diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index 1d47b0f5a75a..2525064cee85 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -288,7 +288,7 @@ void checkColumnNameLowerCase(const std::shared_ptr& type) { } void checkColumnNameLowerCase( - const SubfieldFilters& filters, + const common::SubfieldFilters& filters, const std::unordered_map>& infoColumns) { for (const auto& filterIt : filters) { @@ -349,7 +349,7 @@ std::shared_ptr makeScanSpec( const RowTypePtr& rowType, const folly::F14FastMap>& outputSubfields, - const SubfieldFilters& filters, + const common::SubfieldFilters& filters, const RowTypePtr& dataColumns, const std::unordered_map>& partitionKeys, @@ -837,7 +837,7 @@ core::TypedExprPtr extractFiltersFromRemainingFilter( const core::TypedExprPtr& expr, core::ExpressionEvaluator* evaluator, bool negated, - SubfieldFilters& filters, + common::SubfieldFilters& filters, double& sampleRate) { auto* call = dynamic_cast(expr.get()); if (call == nullptr) { diff --git a/velox/connectors/hive/HiveConnectorUtil.h b/velox/connectors/hive/HiveConnectorUtil.h index d28434feed01..8a68a69db4bd 100644 --- a/velox/connectors/hive/HiveConnectorUtil.h +++ b/velox/connectors/hive/HiveConnectorUtil.h @@ -30,15 +30,17 @@ class HiveTableHandle; class HiveConfig; struct HiveConnectorSplit; +#ifdef VELOX_ENABLE_BACKWARD_COMPATIBILITY using SubfieldFilters = std::unordered_map>; +#endif const std::string& getColumnName(const common::Subfield& subfield); void checkColumnNameLowerCase(const std::shared_ptr& type); void checkColumnNameLowerCase( - const SubfieldFilters& filters, + const common::SubfieldFilters& filters, const std::unordered_map>& infoColumns); @@ -53,7 +55,7 @@ std::shared_ptr makeScanSpec( const RowTypePtr& rowType, const folly::F14FastMap>& outputSubfields, - const SubfieldFilters& filters, + const common::SubfieldFilters& filters, const RowTypePtr& dataColumns, const std::unordered_map>& partitionKeys, @@ -108,7 +110,7 @@ core::TypedExprPtr extractFiltersFromRemainingFilter( const core::TypedExprPtr& expr, core::ExpressionEvaluator* evaluator, bool negated, - SubfieldFilters& filters, + common::SubfieldFilters& filters, double& sampleRate); } // namespace facebook::velox::connector::hive diff --git a/velox/connectors/hive/HiveDataSource.h b/velox/connectors/hive/HiveDataSource.h index a870966603ca..1a0a99fb11ad 100644 --- a/velox/connectors/hive/HiveDataSource.h +++ b/velox/connectors/hive/HiveDataSource.h @@ -32,9 +32,6 @@ namespace facebook::velox::connector::hive { class HiveConfig; -using SubfieldFilters = - std::unordered_map>; - class HiveDataSource : public DataSource { public: HiveDataSource( @@ -162,7 +159,7 @@ class HiveDataSource : public DataSource { SpecialColumnNames specialColumns_{}; folly::F14FastMap> subfields_; - SubfieldFilters filters_; + common::SubfieldFilters filters_; std::shared_ptr metadataFilter_; std::unique_ptr remainingFilterExprSet_; RowVectorPtr emptyOutput_; diff --git a/velox/connectors/hive/TableHandle.cpp b/velox/connectors/hive/TableHandle.cpp index a96b5e2e59b6..9e03adf47022 100644 --- a/velox/connectors/hive/TableHandle.cpp +++ b/velox/connectors/hive/TableHandle.cpp @@ -107,7 +107,7 @@ HiveTableHandle::HiveTableHandle( std::string connectorId, const std::string& tableName, bool filterPushdownEnabled, - SubfieldFilters subfieldFilters, + common::SubfieldFilters subfieldFilters, const core::TypedExprPtr& remainingFilter, const RowTypePtr& dataColumns, const std::unordered_map& tableParameters) @@ -185,7 +185,7 @@ ConnectorTableHandlePtr HiveTableHandle::create( ISerializable::deserialize(it->second, context); } - SubfieldFilters subfieldFilters; + common::SubfieldFilters subfieldFilters; folly::dynamic subfieldFiltersObj = obj["subfieldFilters"]; for (const auto& subfieldFilter : subfieldFiltersObj) { common::Subfield subfield(subfieldFilter["subfield"].asString()); diff --git a/velox/connectors/hive/TableHandle.h b/velox/connectors/hive/TableHandle.h index 456f50c2e306..14916f51a734 100644 --- a/velox/connectors/hive/TableHandle.h +++ b/velox/connectors/hive/TableHandle.h @@ -23,9 +23,6 @@ namespace facebook::velox::connector::hive { -using SubfieldFilters = - std::unordered_map>; - class HiveColumnHandle : public ColumnHandle { public: enum class ColumnType { @@ -126,7 +123,7 @@ class HiveTableHandle : public ConnectorTableHandle { std::string connectorId, const std::string& tableName, bool filterPushdownEnabled, - SubfieldFilters subfieldFilters, + common::SubfieldFilters subfieldFilters, const core::TypedExprPtr& remainingFilter, const RowTypePtr& dataColumns = nullptr, const std::unordered_map& tableParameters = {}); @@ -143,7 +140,7 @@ class HiveTableHandle : public ConnectorTableHandle { return filterPushdownEnabled_; } - const SubfieldFilters& subfieldFilters() const { + const common::SubfieldFilters& subfieldFilters() const { return subfieldFilters_; } @@ -173,7 +170,7 @@ class HiveTableHandle : public ConnectorTableHandle { private: const std::string tableName_; const bool filterPushdownEnabled_; - const SubfieldFilters subfieldFilters_; + const common::SubfieldFilters subfieldFilters_; const core::TypedExprPtr remainingFilter_; const RowTypePtr dataColumns_; const std::unordered_map tableParameters_; diff --git a/velox/connectors/hive/iceberg/PositionalDeleteFileReader.h b/velox/connectors/hive/iceberg/PositionalDeleteFileReader.h index ba98845eb639..c534613c93b8 100644 --- a/velox/connectors/hive/iceberg/PositionalDeleteFileReader.h +++ b/velox/connectors/hive/iceberg/PositionalDeleteFileReader.h @@ -30,9 +30,6 @@ namespace facebook::velox::connector::hive::iceberg { struct IcebergDeleteFile; struct IcebergMetadataColumn; -using SubfieldFilters = - std::unordered_map>; - class PositionalDeleteFileReader { public: PositionalDeleteFileReader( diff --git a/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp b/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp index b2307483e146..ba5daaf1cf2e 100644 --- a/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp +++ b/velox/connectors/hive/tests/HiveConnectorUtilTest.cpp @@ -79,7 +79,7 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) { "testConnectorId", "testTable", false, - hive::SubfieldFilters{}, + common::SubfieldFilters{}, nullptr, nullptr, tableParameters); @@ -305,7 +305,7 @@ TEST_F(HiveConnectorUtilTest, cacheRetention) { "testConnectorId", "testTable", false, - hive::SubfieldFilters{}, + common::SubfieldFilters{}, nullptr, nullptr, std::unordered_map{}); diff --git a/velox/dwio/common/tests/utils/FilterGenerator.h b/velox/dwio/common/tests/utils/FilterGenerator.h index c0fdad28fa52..42368d20ffbe 100644 --- a/velox/dwio/common/tests/utils/FilterGenerator.h +++ b/velox/dwio/common/tests/utils/FilterGenerator.h @@ -32,8 +32,6 @@ namespace facebook::velox::dwio::common { using namespace facebook::velox::common; -using SubfieldFilters = std::unordered_map>; - struct FilterSpec { FilterSpec() {} diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index 85c5f1e139e2..4fac546ec122 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -1242,7 +1242,7 @@ TEST_F(TableScanTest, missingColumns) { assertQuery(op, filePaths, "SELECT count(*) FROM tmp WHERE c1 <= 4000.1", 0); // Use missing column 'c1' in 'is null' filter, while not selecting 'c1'. - SubfieldFilters filters; + common::SubfieldFilters filters; filters[common::Subfield("c1")] = lessThanOrEqualDouble(1050.0, true); auto tableHandle = std::make_shared( kHiveConnectorId, "tmp", true, std::move(filters), nullptr, dataColumns); @@ -1975,7 +1975,7 @@ TEST_F(TableScanTest, partitionedTableDateKey) { {"c0", regularColumn("c0", BIGINT())}, {"c1", regularColumn("c1", DOUBLE())}}; - SubfieldFilters filters; + common::SubfieldFilters filters; // pkey > 2020-09-01. filters[common::Subfield("pkey")] = std::make_unique( 18506, std::numeric_limits::max(), false); @@ -2015,7 +2015,7 @@ TEST_F(TableScanTest, partitionedTableTimestampKey) { {"c0", regularColumn("c0", BIGINT())}, {"c1", regularColumn("c1", DOUBLE())}}; - SubfieldFilters filters; + common::SubfieldFilters filters; // pkey = 2023-10-27 00:12:35. auto lower = util::fromTimestampString( StringView("2023-10-27 00:12:35"), @@ -2709,7 +2709,7 @@ TEST_F(TableScanTest, filterPushdown) { createDuckDbTable(vectors); // c1 >= 0 or null and c3 is true - SubfieldFilters subfieldFilters = + common::SubfieldFilters subfieldFilters = SubfieldFiltersBuilder() .add("c1", greaterThanOrEqual(0, true)) .add("c3", std::make_unique(true, false)) @@ -2805,7 +2805,7 @@ TEST_F(TableScanTest, path) { // use $path in a filter, but don't project it out auto tableHandle = makeTableHandle( - SubfieldFilters{}, + common::SubfieldFilters{}, parseExpr(fmt::format("\"{}\" = '{}'", kPath, pathValue), typeWithPath)); op = PlanBuilder() .startTableScan() @@ -2862,7 +2862,7 @@ TEST_F(TableScanTest, fileSizeAndModifiedTime) { auto filterTest = [&](const std::string& filter) { auto tableHandle = makeTableHandle( - SubfieldFilters{}, + common::SubfieldFilters{}, parseExpr(filter, allColumns), "hive_table", allColumns); @@ -5280,7 +5280,8 @@ TEST_F(TableScanTest, rowNumberInRemainingFilter) { writeToFile(file->getPath(), {vector}); auto outputType = ROW({"c0"}, {BIGINT()}); auto remainingFilter = parseExpr("r1 % 2 == 0", ROW({"r1"}, {BIGINT()})); - auto tableHandle = makeTableHandle(SubfieldFilters{}, remainingFilter); + auto tableHandle = + makeTableHandle(common::SubfieldFilters{}, remainingFilter); auto plan = PlanBuilder() .startTableScan() .outputType(outputType) diff --git a/velox/exec/tests/utils/HiveConnectorTestBase.h b/velox/exec/tests/utils/HiveConnectorTestBase.h index 9104df777f00..98ca4b803757 100644 --- a/velox/exec/tests/utils/HiveConnectorTestBase.h +++ b/velox/exec/tests/utils/HiveConnectorTestBase.h @@ -116,7 +116,7 @@ class HiveConnectorTestBase : public OperatorTestBase { infoColumns = {}); static std::shared_ptr makeTableHandle( - common::test::SubfieldFilters subfieldFilters = {}, + common::SubfieldFilters subfieldFilters = {}, const core::TypedExprPtr& remainingFilter = nullptr, const std::string& tableName = "hive_table", const RowTypePtr& dataColumns = nullptr, diff --git a/velox/exec/tests/utils/PlanBuilder.cpp b/velox/exec/tests/utils/PlanBuilder.cpp index 478fa4c194ca..8739ae9cd965 100644 --- a/velox/exec/tests/utils/PlanBuilder.cpp +++ b/velox/exec/tests/utils/PlanBuilder.cpp @@ -182,7 +182,7 @@ core::PlanNodePtr PlanBuilder::TableScanBuilder::build(core::PlanNodeId id) { const RowTypePtr& parseType = dataColumns_ ? dataColumns_ : outputType_; - SubfieldFilters filters; + common::SubfieldFilters filters; filters.reserve(subfieldFilters_.size()); auto queryCtx = core::QueryCtx::create(); exec::SimpleExpressionEvaluator evaluator(queryCtx.get(), planBuilder_.pool_); diff --git a/velox/substrait/SubstraitToVeloxPlan.cpp b/velox/substrait/SubstraitToVeloxPlan.cpp index f13f4bc9bd82..0a8ae48a5932 100644 --- a/velox/substrait/SubstraitToVeloxPlan.cpp +++ b/velox/substrait/SubstraitToVeloxPlan.cpp @@ -412,11 +412,11 @@ core::PlanNodePtr SubstraitVeloxPlanConverter::toVeloxPlan( kHiveConnectorId, "hive_table", filterPushdownEnabled, - connector::hive::SubfieldFilters{}, + common::SubfieldFilters{}, nullptr, nullptr); } else { - connector::hive::SubfieldFilters filters = + common::SubfieldFilters filters = toVeloxFilter(colNameList, veloxTypeList, readRel.filter()); tableHandle = std::make_shared( kHiveConnectorId, @@ -633,12 +633,12 @@ class FilterInfo { bool isInitialized_ = false; }; -connector::hive::SubfieldFilters SubstraitVeloxPlanConverter::toVeloxFilter( +common::SubfieldFilters SubstraitVeloxPlanConverter::toVeloxFilter( const std::vector& inputNameList, const std::vector& inputTypeList, const ::substrait::Expression& substraitFilter) { - connector::hive::SubfieldFilters filters; - // A map between the column index and the FilterInfo for that column. + common::SubfieldFilters filters; + // A map betweesn the column index and the FilterInfo for that column. std::unordered_map> colInfoMap; for (int idx = 0; idx < inputNameList.size(); idx++) { colInfoMap[idx] = std::make_shared(); diff --git a/velox/substrait/SubstraitToVeloxPlan.h b/velox/substrait/SubstraitToVeloxPlan.h index c4a951c3d3e4..efdd16b012a9 100644 --- a/velox/substrait/SubstraitToVeloxPlan.h +++ b/velox/substrait/SubstraitToVeloxPlan.h @@ -125,7 +125,7 @@ class SubstraitVeloxPlanConverter { /// Used to convert Substrait Filter into Velox SubfieldFilters which will /// be used in TableScan. - connector::hive::SubfieldFilters toVeloxFilter( + common::SubfieldFilters toVeloxFilter( const std::vector& inputNameList, const std::vector& inputTypeList, const ::substrait::Expression& substraitFilter); diff --git a/velox/type/Filter.h b/velox/type/Filter.h index ac382a7f8379..f46b26c5f12a 100644 --- a/velox/type/Filter.h +++ b/velox/type/Filter.h @@ -29,6 +29,7 @@ #include "velox/common/base/SimdUtil.h" #include "velox/common/serialization/Serializable.h" #include "velox/type/StringView.h" +#include "velox/type/Subfield.h" #include "velox/type/Type.h" namespace facebook::velox::common { @@ -61,6 +62,8 @@ enum class FilterKind { class Filter; using FilterPtr = std::unique_ptr; +using SubfieldFilters = std::unordered_map>; + /** * A simple filter (e.g. comparison with literal) that can be applied * efficiently while extracting values from an ORC stream. diff --git a/velox/type/tests/SubfieldFiltersBuilder.h b/velox/type/tests/SubfieldFiltersBuilder.h index fa99b7353078..851b890a3509 100644 --- a/velox/type/tests/SubfieldFiltersBuilder.h +++ b/velox/type/tests/SubfieldFiltersBuilder.h @@ -19,9 +19,6 @@ namespace facebook::velox::common::test { -using SubfieldFilters = - std::unordered_map>; - class SubfieldFiltersBuilder { public: SubfieldFiltersBuilder& add(