Skip to content

Commit

Permalink
refactor: Consolidate subfield filter definitions (facebookincubator#…
Browse files Browse the repository at this point in the history
…12184)

Summary:
X-link: facebookexperimental/verax#2

Pull Request resolved: facebookincubator#12184

There are multiple definitions of subfield filters in velox and consolidate them in Filter.h

Reviewed By: Yuhta

Differential Revision: D68719054

fbshipit-source-id: 7085c7ad803980d960e32ce4a981bd4a722558e9
  • Loading branch information
xiaoxmeng authored and facebook-github-bot committed Jan 28, 2025
1 parent 9999ae0 commit ce273fa
Show file tree
Hide file tree
Showing 15 changed files with 35 additions and 43 deletions.
6 changes: 3 additions & 3 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ void checkColumnNameLowerCase(const std::shared_ptr<const Type>& type) {
}

void checkColumnNameLowerCase(
const SubfieldFilters& filters,
const common::SubfieldFilters& filters,
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>&
infoColumns) {
for (const auto& filterIt : filters) {
Expand Down Expand Up @@ -349,7 +349,7 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
const RowTypePtr& rowType,
const folly::F14FastMap<std::string, std::vector<const common::Subfield*>>&
outputSubfields,
const SubfieldFilters& filters,
const common::SubfieldFilters& filters,
const RowTypePtr& dataColumns,
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>&
partitionKeys,
Expand Down Expand Up @@ -837,7 +837,7 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
const core::TypedExprPtr& expr,
core::ExpressionEvaluator* evaluator,
bool negated,
SubfieldFilters& filters,
common::SubfieldFilters& filters,
double& sampleRate) {
auto* call = dynamic_cast<const core::CallTypedExpr*>(expr.get());
if (call == nullptr) {
Expand Down
8 changes: 5 additions & 3 deletions velox/connectors/hive/HiveConnectorUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,17 @@ class HiveTableHandle;
class HiveConfig;
struct HiveConnectorSplit;

#ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY
using SubfieldFilters =
std::unordered_map<common::Subfield, std::unique_ptr<common::Filter>>;
#endif

const std::string& getColumnName(const common::Subfield& subfield);

void checkColumnNameLowerCase(const std::shared_ptr<const Type>& type);

void checkColumnNameLowerCase(
const SubfieldFilters& filters,
const common::SubfieldFilters& filters,
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>&
infoColumns);

Expand All @@ -53,7 +55,7 @@ std::shared_ptr<common::ScanSpec> makeScanSpec(
const RowTypePtr& rowType,
const folly::F14FastMap<std::string, std::vector<const common::Subfield*>>&
outputSubfields,
const SubfieldFilters& filters,
const common::SubfieldFilters& filters,
const RowTypePtr& dataColumns,
const std::unordered_map<std::string, std::shared_ptr<HiveColumnHandle>>&
partitionKeys,
Expand Down Expand Up @@ -108,7 +110,7 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
const core::TypedExprPtr& expr,
core::ExpressionEvaluator* evaluator,
bool negated,
SubfieldFilters& filters,
common::SubfieldFilters& filters,
double& sampleRate);

} // namespace facebook::velox::connector::hive
5 changes: 1 addition & 4 deletions velox/connectors/hive/HiveDataSource.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@ namespace facebook::velox::connector::hive {

class HiveConfig;

using SubfieldFilters =
std::unordered_map<common::Subfield, std::unique_ptr<common::Filter>>;

class HiveDataSource : public DataSource {
public:
HiveDataSource(
Expand Down Expand Up @@ -162,7 +159,7 @@ class HiveDataSource : public DataSource {
SpecialColumnNames specialColumns_{};
folly::F14FastMap<std::string, std::vector<const common::Subfield*>>
subfields_;
SubfieldFilters filters_;
common::SubfieldFilters filters_;
std::shared_ptr<common::MetadataFilter> metadataFilter_;
std::unique_ptr<exec::ExprSet> remainingFilterExprSet_;
RowVectorPtr emptyOutput_;
Expand Down
4 changes: 2 additions & 2 deletions velox/connectors/hive/TableHandle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ HiveTableHandle::HiveTableHandle(
std::string connectorId,
const std::string& tableName,
bool filterPushdownEnabled,
SubfieldFilters subfieldFilters,
common::SubfieldFilters subfieldFilters,
const core::TypedExprPtr& remainingFilter,
const RowTypePtr& dataColumns,
const std::unordered_map<std::string, std::string>& tableParameters)
Expand Down Expand Up @@ -185,7 +185,7 @@ ConnectorTableHandlePtr HiveTableHandle::create(
ISerializable::deserialize<core::ITypedExpr>(it->second, context);
}

SubfieldFilters subfieldFilters;
common::SubfieldFilters subfieldFilters;
folly::dynamic subfieldFiltersObj = obj["subfieldFilters"];
for (const auto& subfieldFilter : subfieldFiltersObj) {
common::Subfield subfield(subfieldFilter["subfield"].asString());
Expand Down
9 changes: 3 additions & 6 deletions velox/connectors/hive/TableHandle.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@

namespace facebook::velox::connector::hive {

using SubfieldFilters =
std::unordered_map<common::Subfield, std::unique_ptr<common::Filter>>;

class HiveColumnHandle : public ColumnHandle {
public:
enum class ColumnType {
Expand Down Expand Up @@ -126,7 +123,7 @@ class HiveTableHandle : public ConnectorTableHandle {
std::string connectorId,
const std::string& tableName,
bool filterPushdownEnabled,
SubfieldFilters subfieldFilters,
common::SubfieldFilters subfieldFilters,
const core::TypedExprPtr& remainingFilter,
const RowTypePtr& dataColumns = nullptr,
const std::unordered_map<std::string, std::string>& tableParameters = {});
Expand All @@ -143,7 +140,7 @@ class HiveTableHandle : public ConnectorTableHandle {
return filterPushdownEnabled_;
}

const SubfieldFilters& subfieldFilters() const {
const common::SubfieldFilters& subfieldFilters() const {
return subfieldFilters_;
}

Expand Down Expand Up @@ -173,7 +170,7 @@ class HiveTableHandle : public ConnectorTableHandle {
private:
const std::string tableName_;
const bool filterPushdownEnabled_;
const SubfieldFilters subfieldFilters_;
const common::SubfieldFilters subfieldFilters_;
const core::TypedExprPtr remainingFilter_;
const RowTypePtr dataColumns_;
const std::unordered_map<std::string, std::string> tableParameters_;
Expand Down
3 changes: 0 additions & 3 deletions velox/connectors/hive/iceberg/PositionalDeleteFileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ namespace facebook::velox::connector::hive::iceberg {
struct IcebergDeleteFile;
struct IcebergMetadataColumn;

using SubfieldFilters =
std::unordered_map<common::Subfield, std::unique_ptr<common::Filter>>;

class PositionalDeleteFileReader {
public:
PositionalDeleteFileReader(
Expand Down
4 changes: 2 additions & 2 deletions velox/connectors/hive/tests/HiveConnectorUtilTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) {
"testConnectorId",
"testTable",
false,
hive::SubfieldFilters{},
common::SubfieldFilters{},
nullptr,
nullptr,
tableParameters);
Expand Down Expand Up @@ -305,7 +305,7 @@ TEST_F(HiveConnectorUtilTest, cacheRetention) {
"testConnectorId",
"testTable",
false,
hive::SubfieldFilters{},
common::SubfieldFilters{},
nullptr,
nullptr,
std::unordered_map<std::string, std::string>{});
Expand Down
2 changes: 0 additions & 2 deletions velox/dwio/common/tests/utils/FilterGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ namespace facebook::velox::dwio::common {

using namespace facebook::velox::common;

using SubfieldFilters = std::unordered_map<Subfield, std::unique_ptr<Filter>>;

struct FilterSpec {
FilterSpec() {}

Expand Down
15 changes: 8 additions & 7 deletions velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,7 @@ TEST_F(TableScanTest, missingColumns) {
assertQuery(op, filePaths, "SELECT count(*) FROM tmp WHERE c1 <= 4000.1", 0);

// Use missing column 'c1' in 'is null' filter, while not selecting 'c1'.
SubfieldFilters filters;
common::SubfieldFilters filters;
filters[common::Subfield("c1")] = lessThanOrEqualDouble(1050.0, true);
auto tableHandle = std::make_shared<HiveTableHandle>(
kHiveConnectorId, "tmp", true, std::move(filters), nullptr, dataColumns);
Expand Down Expand Up @@ -1975,7 +1975,7 @@ TEST_F(TableScanTest, partitionedTableDateKey) {
{"c0", regularColumn("c0", BIGINT())},
{"c1", regularColumn("c1", DOUBLE())}};

SubfieldFilters filters;
common::SubfieldFilters filters;
// pkey > 2020-09-01.
filters[common::Subfield("pkey")] = std::make_unique<common::BigintRange>(
18506, std::numeric_limits<int64_t>::max(), false);
Expand Down Expand Up @@ -2015,7 +2015,7 @@ TEST_F(TableScanTest, partitionedTableTimestampKey) {
{"c0", regularColumn("c0", BIGINT())},
{"c1", regularColumn("c1", DOUBLE())}};

SubfieldFilters filters;
common::SubfieldFilters filters;
// pkey = 2023-10-27 00:12:35.
auto lower = util::fromTimestampString(
StringView("2023-10-27 00:12:35"),
Expand Down Expand Up @@ -2709,7 +2709,7 @@ TEST_F(TableScanTest, filterPushdown) {
createDuckDbTable(vectors);

// c1 >= 0 or null and c3 is true
SubfieldFilters subfieldFilters =
common::SubfieldFilters subfieldFilters =
SubfieldFiltersBuilder()
.add("c1", greaterThanOrEqual(0, true))
.add("c3", std::make_unique<common::BoolValue>(true, false))
Expand Down Expand Up @@ -2805,7 +2805,7 @@ TEST_F(TableScanTest, path) {

// use $path in a filter, but don't project it out
auto tableHandle = makeTableHandle(
SubfieldFilters{},
common::SubfieldFilters{},
parseExpr(fmt::format("\"{}\" = '{}'", kPath, pathValue), typeWithPath));
op = PlanBuilder()
.startTableScan()
Expand Down Expand Up @@ -2862,7 +2862,7 @@ TEST_F(TableScanTest, fileSizeAndModifiedTime) {

auto filterTest = [&](const std::string& filter) {
auto tableHandle = makeTableHandle(
SubfieldFilters{},
common::SubfieldFilters{},
parseExpr(filter, allColumns),
"hive_table",
allColumns);
Expand Down Expand Up @@ -5280,7 +5280,8 @@ TEST_F(TableScanTest, rowNumberInRemainingFilter) {
writeToFile(file->getPath(), {vector});
auto outputType = ROW({"c0"}, {BIGINT()});
auto remainingFilter = parseExpr("r1 % 2 == 0", ROW({"r1"}, {BIGINT()}));
auto tableHandle = makeTableHandle(SubfieldFilters{}, remainingFilter);
auto tableHandle =
makeTableHandle(common::SubfieldFilters{}, remainingFilter);
auto plan = PlanBuilder()
.startTableScan()
.outputType(outputType)
Expand Down
2 changes: 1 addition & 1 deletion velox/exec/tests/utils/HiveConnectorTestBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class HiveConnectorTestBase : public OperatorTestBase {
infoColumns = {});

static std::shared_ptr<connector::hive::HiveTableHandle> makeTableHandle(
common::test::SubfieldFilters subfieldFilters = {},
common::SubfieldFilters subfieldFilters = {},
const core::TypedExprPtr& remainingFilter = nullptr,
const std::string& tableName = "hive_table",
const RowTypePtr& dataColumns = nullptr,
Expand Down
2 changes: 1 addition & 1 deletion velox/exec/tests/utils/PlanBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ core::PlanNodePtr PlanBuilder::TableScanBuilder::build(core::PlanNodeId id) {

const RowTypePtr& parseType = dataColumns_ ? dataColumns_ : outputType_;

SubfieldFilters filters;
common::SubfieldFilters filters;
filters.reserve(subfieldFilters_.size());
auto queryCtx = core::QueryCtx::create();
exec::SimpleExpressionEvaluator evaluator(queryCtx.get(), planBuilder_.pool_);
Expand Down
10 changes: 5 additions & 5 deletions velox/substrait/SubstraitToVeloxPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -412,11 +412,11 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan(
kHiveConnectorId,
"hive_table",
filterPushdownEnabled,
connector::hive::SubfieldFilters{},
common::SubfieldFilters{},
nullptr,
nullptr);
} else {
connector::hive::SubfieldFilters filters =
common::SubfieldFilters filters =
toVeloxFilter(colNameList, veloxTypeList, readRel.filter());
tableHandle = std::make_shared<connector::hive::HiveTableHandle>(
kHiveConnectorId,
Expand Down Expand Up @@ -633,12 +633,12 @@ class FilterInfo {
bool isInitialized_ = false;
};

connector::hive::SubfieldFilters SubstraitVeloxPlanConverter::toVeloxFilter(
common::SubfieldFilters SubstraitVeloxPlanConverter::toVeloxFilter(
const std::vector<std::string>& inputNameList,
const std::vector<TypePtr>& inputTypeList,
const ::substrait::Expression& substraitFilter) {
connector::hive::SubfieldFilters filters;
// A map between the column index and the FilterInfo for that column.
common::SubfieldFilters filters;
// A map betweesn the column index and the FilterInfo for that column.
std::unordered_map<int, std::shared_ptr<FilterInfo>> colInfoMap;
for (int idx = 0; idx < inputNameList.size(); idx++) {
colInfoMap[idx] = std::make_shared<FilterInfo>();
Expand Down
2 changes: 1 addition & 1 deletion velox/substrait/SubstraitToVeloxPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ class SubstraitVeloxPlanConverter {

/// Used to convert Substrait Filter into Velox SubfieldFilters which will
/// be used in TableScan.
connector::hive::SubfieldFilters toVeloxFilter(
common::SubfieldFilters toVeloxFilter(
const std::vector<std::string>& inputNameList,
const std::vector<TypePtr>& inputTypeList,
const ::substrait::Expression& substraitFilter);
Expand Down
3 changes: 3 additions & 0 deletions velox/type/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "velox/common/base/SimdUtil.h"
#include "velox/common/serialization/Serializable.h"
#include "velox/type/StringView.h"
#include "velox/type/Subfield.h"
#include "velox/type/Type.h"

namespace facebook::velox::common {
Expand Down Expand Up @@ -61,6 +62,8 @@ enum class FilterKind {
class Filter;
using FilterPtr = std::unique_ptr<Filter>;

using SubfieldFilters = std::unordered_map<Subfield, std::unique_ptr<Filter>>;

/**
* A simple filter (e.g. comparison with literal) that can be applied
* efficiently while extracting values from an ORC stream.
Expand Down
3 changes: 0 additions & 3 deletions velox/type/tests/SubfieldFiltersBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

namespace facebook::velox::common::test {

using SubfieldFilters =
std::unordered_map<common::Subfield, std::unique_ptr<common::Filter>>;

class SubfieldFiltersBuilder {
public:
SubfieldFiltersBuilder& add(
Expand Down

0 comments on commit ce273fa

Please sign in to comment.