Skip to content

Commit

Permalink
Support date-type partition filter (facebookincubator#9937)
Browse files Browse the repository at this point in the history
Summary:
When the partition column is of date type and a filter is applied on it, below
error occurs when trying to convert date string as int. This PR fixes this
exception by utilizing castFromDateString with standard cast behavior.
```
C++ exception with description "Exception: VeloxRuntimeError
Error Source: RUNTIME
Error Code: INVALID_STATE
Reason: Operator::getOutput failed for [operator: TableScan, plan node ID: 0]: Non-whitespace character found after end of conversion: "-10-27"
```
facebookincubator#7084

Pull Request resolved: facebookincubator#9937

Reviewed By: pedroerp

Differential Revision: D57913463

Pulled By: Yuhta

fbshipit-source-id: 7956de4ae122b080ecf80a96cfcf546f8aae7efe
  • Loading branch information
rui-mo authored and Joe-Abraham committed Jun 7, 2024
1 parent c72e4a4 commit d5b0d8e
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 5 deletions.
17 changes: 13 additions & 4 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "velox/dwio/common/Reader.h"
#include "velox/expression/Expr.h"
#include "velox/expression/ExprToSubfieldFilter.h"
#include "velox/type/TimestampConversion.h"

namespace facebook::velox::connector::hive {

Expand Down Expand Up @@ -577,10 +578,17 @@ void configureRowReaderOptions(
namespace {

bool applyPartitionFilter(
TypeKind kind,
const TypePtr& type,
const std::string& partitionValue,
common::Filter* filter) {
switch (kind) {
if (type->isDate()) {
const auto result = util::castFromDateString(
StringView(partitionValue), util::ParseMode::kStandardCast);
VELOX_CHECK(!result.hasError());
return applyFilter(*filter, result.value());
}

switch (type->kind()) {
case TypeKind::BIGINT:
case TypeKind::INTEGER:
case TypeKind::SMALLINT:
Expand All @@ -598,7 +606,8 @@ bool applyPartitionFilter(
return applyFilter(*filter, partitionValue);
}
default:
VELOX_FAIL("Bad type {} for partition value: {}", kind, partitionValue);
VELOX_FAIL(
"Bad type {} for partition value: {}", type->kind(), partitionValue);
}
}

Expand Down Expand Up @@ -629,7 +638,7 @@ bool testFilters(

// This is a non-null partition key
return applyPartitionFilter(
handlesIter->second->dataType()->kind(),
handlesIter->second->dataType(),
iter->second.value(),
child->filter());
}
Expand Down
32 changes: 31 additions & 1 deletion velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1753,7 +1753,37 @@ TEST_F(TableScanTest, partitionedTableDateKey) {
auto filePath = TempFilePath::create();
writeToFile(filePath->getPath(), vectors);
createDuckDbTable(vectors);
testPartitionedTable(filePath->getPath(), DATE(), "2023-10-27");
const std::string partitionValue = "2023-10-27";
testPartitionedTable(filePath->getPath(), DATE(), partitionValue);

// Test partition filter on date column.
{
auto split = HiveConnectorSplitBuilder(filePath->getPath())
.partitionKey("pkey", partitionValue)
.build();
auto outputType = ROW({"pkey", "c0", "c1"}, {DATE(), BIGINT(), DOUBLE()});
ColumnHandleMap assignments = {
{"pkey", partitionKey("pkey", DATE())},
{"c0", regularColumn("c0", BIGINT())},
{"c1", regularColumn("c1", DOUBLE())}};

SubfieldFilters filters;
// pkey > 2020-09-01.
filters[common::Subfield("pkey")] = std::make_unique<common::BigintRange>(
18506, std::numeric_limits<int64_t>::max(), false);

auto tableHandle = std::make_shared<HiveTableHandle>(
"test-hive", "hive_table", true, std::move(filters), nullptr, nullptr);
auto op = std::make_shared<TableScanNode>(
"0",
std::move(outputType),
std::move(tableHandle),
std::move(assignments));

std::string partitionValueStr = "'" + partitionValue + "'";
assertQuery(
op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr));
}
}

std::vector<StringView> toStringViews(const std::vector<std::string>& values) {
Expand Down

0 comments on commit d5b0d8e

Please sign in to comment.