Skip to content

Commit

Permalink
Handle UNKNOWN type input in approx_distinct
Browse files Browse the repository at this point in the history
Reviewed By: amitkdutta

Differential Revision: D65100134
  • Loading branch information
Yuhta authored and facebook-github-bot committed Oct 28, 2024
1 parent 27d0527 commit bd856b5
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 13 deletions.
32 changes: 19 additions & 13 deletions velox/functions/prestosql/aggregates/ApproxDistinctAggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,19 +453,21 @@ exec::AggregateRegistrationResult registerApproxDistinct(
.argumentType("hyperloglog")
.build());
} else {
for (const auto& inputType :
{"boolean",
"tinyint",
"smallint",
"integer",
"bigint",
"hugeint",
"real",
"double",
"varchar",
"varbinary",
"timestamp",
"date"}) {
for (const auto& inputType : {
"boolean",
"tinyint",
"smallint",
"integer",
"bigint",
"hugeint",
"real",
"double",
"varchar",
"varbinary",
"timestamp",
"date",
"unknown",
}) {
signatures.push_back(exec::AggregateFunctionSignatureBuilder()
.returnType(returnType)
.intermediateType("varbinary")
Expand Down Expand Up @@ -505,6 +507,10 @@ exec::AggregateRegistrationResult registerApproxDistinct(
const TypePtr& resultType,
const core::QueryConfig& /*config*/)
-> std::unique_ptr<exec::Aggregate> {
if (argTypes[0]->isUnKnown()) {
return std::make_unique<ApproxDistinctAggregate<UnknownValue>>(
resultType, hllAsFinalResult, hllAsRawInput, defaultError);
}
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(
createApproxDistinct,
argTypes[0]->kind(),
Expand Down
39 changes: 39 additions & 0 deletions velox/functions/prestosql/aggregates/tests/ApproxDistinctTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,5 +432,44 @@ TEST_F(ApproxDistinctTest, toIntermediate) {
digests, {"c0"}, {"merge(a0)"}, {"c0", "cardinality(a0)"}, {input});
}

TEST_F(ApproxDistinctTest, unknownType) {
constexpr int kSize = 10;
auto input = makeRowVector({
makeFlatVector<int32_t>(kSize, [](auto i) { return i % 2; }),
makeAllNullFlatVector<UnknownValue>(kSize),
});
testAggregations(
{input},
{},
{"approx_distinct(c1)", "approx_distinct(c1, 0.023)"},
{makeRowVector(std::vector<VectorPtr>(2, makeConstant<int64_t>(0, 1)))});
testAggregations(
{input},
{},
{"approx_set(c1)", "approx_set(c1, 0.01625)"},
{"cardinality(a0)", "cardinality(a1)"},
{makeRowVector(
std::vector<VectorPtr>(2, makeNullConstant(TypeKind::BIGINT, 1)))});
testAggregations(
{input},
{"c0"},
{"approx_distinct(c1)", "approx_distinct(c1, 0.023)"},
{makeRowVector({
makeFlatVector<int32_t>({0, 1}),
makeFlatVector<int64_t>({0, 0}),
makeFlatVector<int64_t>({0, 0}),
})});
testAggregations(
{input},
{"c0"},
{"approx_set(c1)", "approx_set(c1, 0.01625)"},
{"c0", "cardinality(a0)", "cardinality(a1)"},
{makeRowVector({
makeFlatVector<int32_t>({0, 1}),
makeNullConstant(TypeKind::BIGINT, 2),
makeNullConstant(TypeKind::BIGINT, 2),
})});
}

} // namespace
} // namespace facebook::velox::aggregate::test

0 comments on commit bd856b5

Please sign in to comment.