Skip to content

Commit

Permalink
Fix find_first Presto function on empty arrays and invalid start index (
Browse files Browse the repository at this point in the history
facebookincubator#8030)

Summary:
Pull Request resolved: facebookincubator#8030

find_first(<empty array>,  0, <lambda>) should throw.

Fixes facebookincubator#8020

Reviewed By: xiaoxmeng

Differential Revision: D52140846

fbshipit-source-id: 8305ea28e1c874d07daf458c98b9855f87dd42cf
  • Loading branch information
mbasmanova authored and facebook-github-bot committed Dec 14, 2023
1 parent 30d0cb7 commit c6ad4bc
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 6 deletions.
33 changes: 27 additions & 6 deletions velox/functions/prestosql/FindFirst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@
namespace facebook::velox::functions {
namespace {

void recordInvalidStartIndex(vector_size_t row, exec::EvalCtx& context) {
try {
VELOX_USER_FAIL("SQL array indices start at 1. Got 0.");
} catch (const VeloxUserError& exception) {
context.setVeloxExceptionError(row, std::current_exception());
}
}

class FindFirstFunctionBase : public exec::VectorFunction {
public:
bool isDefaultNullBehavior() const override {
Expand Down Expand Up @@ -175,11 +183,7 @@ class FindFirstFunctionBase : public exec::VectorFunction {
rawAdjustedOffsets[row] = 0;
rawAdjustedSizes[row] = 0;

try {
VELOX_USER_FAIL("SQL array indices start at 1. Got 0.");
} catch (const VeloxUserError& exception) {
context.setVeloxExceptionError(row, std::current_exception());
}
recordInvalidStartIndex(row, context);
} else if (start > 0) {
rawAdjustedOffsets[row] = offset + (start - 1);
rawAdjustedSizes[row] = size - (start - 1);
Expand Down Expand Up @@ -271,17 +275,34 @@ class FindFirstFunction : public FindFirstFunctionBase {
exec::EvalCtx& context,
VectorPtr& result) const override {
auto flatArray = prepareInputArray(args[0], rows, context);
auto startIndexVector = (args.size() == 3 ? args[1] : nullptr);

if (flatArray->elements()->size() == 0) {
// All arrays are NULL or empty.

if (startIndexVector != nullptr) {
// Check start indices for zeros.

exec::LocalDecodedVector startIndexDecoder(
context, *startIndexVector, rows);
rows.applyToSelected([&](auto row) {
if (flatArray->isNullAt(row) || startIndexDecoder->isNullAt(row)) {
return;
}
const auto start = startIndexDecoder->valueAt<int64_t>(row);
if (start == 0) {
recordInvalidStartIndex(row, context);
}
});
}

auto localResult = BaseVector::createNullConstant(
outputType, rows.end(), context.pool());
context.moveOrCopyResult(localResult, rows, result);
return;
}

auto* predicateVector = args.back()->asUnchecked<FunctionVector>();
auto startIndexVector = (args.size() == 3 ? args[1] : nullptr);

// Collect indices of the first matching elements or NULLs if no match or
// error.
Expand Down
21 changes: 21 additions & 0 deletions velox/functions/prestosql/tests/FindFirstTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,27 @@ TEST_F(FindFirstTest, invalidIndex) {
auto expected = makeNullableFlatVector<int32_t>(
{2, std::nullopt, std::nullopt, std::nullopt});
verify("find_first(c0, c1, x -> (x > 0))", data, expected);

// All null or empty arrays.
data = makeRowVector({
makeArrayVectorFromJson<int32_t>({
"[]",
"null",
"[]",
"null",
}),
makeFlatVector<int32_t>({2, 0, 0, 1}),
});

// Index 0 is not valid. Expect an error in the 3rd row.
VELOX_ASSERT_THROW(
evaluate("find_first(c0, c1, x -> (x > 0))", data),
"SQL array indices start at 1. Got 0.");

// Mark 3rd row null. Expect no error.
data->setNull(2, true);
expected = makeAllNullFlatVector<int32_t>(4);
verify("find_first(c0, c1, x -> (x > 0))", data, expected);
}

// Verify that null arrays with non-zero offsets/sizes are processed correctly.
Expand Down

0 comments on commit c6ad4bc

Please sign in to comment.