Skip to content

Commit

Permalink
Throw user error for invalid time zone in make_timestamp Spark functi…
Browse files Browse the repository at this point in the history
…on (#9631)

Summary:
getTimeZoneID fails with VELOX_FAIL on unknown time zone, which causes
unexpected failure in the fuzzer test. This PR changes getTimeZoneID to
throw user error.

Pull Request resolved: #9631

Reviewed By: pedroerp

Differential Revision: D56710785

Pulled By: kagamiori

fbshipit-source-id: 629038e5664a4bbfc06119bc533bb5c17ff02e1e
  • Loading branch information
rui-mo authored and facebook-github-bot committed Apr 29, 2024
1 parent 9370245 commit e931e76
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 5 deletions.
14 changes: 10 additions & 4 deletions velox/functions/sparksql/MakeTimestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ void setTimestampOrNull(
std::optional<Timestamp> timestamp,
DecodedVector* timeZoneVector,
FlatVector<Timestamp>* result) {
const auto timeZone = timeZoneVector->valueAt<StringView>(row);
const auto tzID = util::getTimeZoneID(std::string_view(timeZone));
if (timestamp.has_value()) {
auto timeZone = timeZoneVector->valueAt<StringView>(row);
auto tzID = util::getTimeZoneID(std::string_view(timeZone));
(*timestamp).toGMT(tzID);
result->set(row, *timestamp);
} else {
Expand Down Expand Up @@ -122,15 +122,21 @@ class MakeTimestampFunction : public exec::VectorFunction {
if (args[6]->isConstantEncoding()) {
auto tz =
args[6]->asUnchecked<ConstantVector<StringView>>()->valueAt(0);
auto constantTzID = util::getTimeZoneID(std::string_view(tz));
int64_t constantTzID;
try {
constantTzID = util::getTimeZoneID(std::string_view(tz));
} catch (const VeloxException& e) {
context.setErrors(rows, std::current_exception());
return;
}
rows.applyToSelected([&](vector_size_t row) {
auto timestamp = makeTimeStampFromDecodedArgs(
row, year, month, day, hour, minute, micros);
setTimestampOrNull(row, timestamp, constantTzID, resultFlatVector);
});
} else {
auto* timeZone = decodedArgs.at(6);
rows.applyToSelected([&](vector_size_t row) {
context.applyToSelectedNoThrow(rows, [&](auto row) {
auto timestamp = makeTimeStampFromDecodedArgs(
row, year, month, day, hour, minute, micros);
setTimestampOrNull(row, timestamp, timeZone, resultFlatVector);
Expand Down
50 changes: 50 additions & 0 deletions velox/functions/sparksql/tests/MakeTimestampTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,5 +173,55 @@ TEST_F(MakeTimestampTest, errors) {
"make_timestamp requires session time zone to be set.");
}

TEST_F(MakeTimestampTest, invalidTimezone) {
const auto microsType = DECIMAL(16, 6);
const auto year = makeFlatVector<int32_t>({2021, 2021, 2021, 2021, 2021});
const auto month = makeFlatVector<int32_t>({7, 7, 7, 7, 7});
const auto day = makeFlatVector<int32_t>({11, 11, 11, 11, 11});
const auto hour = makeFlatVector<int32_t>({6, 6, 6, -6, 6});
const auto minute = makeFlatVector<int32_t>({30, 30, 30, 30, 30});
const auto micros = makeNullableFlatVector<int64_t>(
{45678000, 1e6, 6e7, 59999999, std::nullopt}, microsType);
auto data = makeRowVector({year, month, day, hour, minute, micros});

// Invalid time zone from query config.
setQueryTimeZone("Invalid");
VELOX_ASSERT_USER_THROW(
evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data),
"Unknown time zone: 'Invalid'");

setQueryTimeZone("");
VELOX_ASSERT_USER_THROW(
evaluate("make_timestamp(c0, c1, c2, c3, c4, c5)", data),
"make_timestamp requires session time zone to be set.");

// Invalid constant time zone.
setQueryTimeZone("GMT");
for (auto timeZone : {"Invalid", ""}) {
SCOPED_TRACE(fmt::format("timezone: {}", timeZone));
VELOX_ASSERT_USER_THROW(
evaluate(
fmt::format(
"make_timestamp(c0, c1, c2, c3, c4, c5, '{}')", timeZone),
data),
fmt::format("Unknown time zone: '{}'", timeZone));
}

// Invalid timezone from vector.
auto timeZones = makeFlatVector<StringView>(
{"GMT", "CET", "Asia/Shanghai", "Invalid", "GMT"});
data = makeRowVector({year, month, day, hour, minute, micros, timeZones});
VELOX_ASSERT_USER_THROW(
evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data),
"Unknown time zone: 'Invalid'");

timeZones =
makeFlatVector<StringView>({"GMT", "CET", "Asia/Shanghai", "", "GMT"});
data = makeRowVector({year, month, day, hour, minute, micros, timeZones});
VELOX_ASSERT_USER_THROW(
evaluate("make_timestamp(c0, c1, c2, c3, c4, c5, c6)", data),
"Unknown time zone: ''");
}

} // namespace
} // namespace facebook::velox::functions::sparksql::test
2 changes: 1 addition & 1 deletion velox/type/tz/TimeZoneMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ int64_t getTimeZoneID(std::string_view timeZone, bool failOnError) {
return it->second;
}
if (failOnError) {
VELOX_FAIL("Unknown time zone: '{}'", timeZone);
VELOX_USER_FAIL("Unknown time zone: '{}'", timeZone);
}
return -1;
}
Expand Down

0 comments on commit e931e76

Please sign in to comment.