From 0e5f3b0baaa3b62387bbe917f19ef8caf72588c1 Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Mon, 21 Oct 2024 14:01:26 -0700 Subject: [PATCH] Fix to_iso8601 to use Z for UTC (#11279) Summary: Presto's to_iso8601 UDF uses JODA's default ISODateTimeFormat.dateTime() formatter which uses the string 'Z' in place of the time zone offset if the time zone is UTC. https://www.joda.org/joda-time/apidocs/org/joda/time/format/ISODateTimeFormat.html#dateTime-- Internally JODA's DateTimeFormatter does this by taking an optional hard coded string to use for the time zone offset if it's zero. I added something similar to Velox's DateTimeFormatter.format and setting it in the to_iso8601 implementation. I checked and other Presto UDFs that format TimestampWithTimeZones as strings, e.g. format_datetime, do not use this option, so they do not need to be updated. Reviewed By: bikramSingh91 Differential Revision: D64488962 --- velox/functions/lib/DateTimeFormatter.cpp | 10 +++++++++- velox/functions/lib/DateTimeFormatter.h | 3 ++- velox/functions/prestosql/DateTimeFunctions.h | 4 ++-- .../prestosql/tests/DateTimeFunctionsTest.cpp | 14 +++++++++----- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 86f40b351948c..dc58e6d527aee 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -1067,7 +1067,8 @@ int32_t DateTimeFormatter::format( const tz::TimeZone* timezone, const uint32_t maxResultSize, char* result, - bool allowOverflow) const { + bool allowOverflow, + const std::optional& zeroOffsetText) const { int64_t offset = 0; Timestamp t = timestamp; if (timezone != nullptr) { @@ -1300,6 +1301,13 @@ int32_t DateTimeFormatter::format( getSpecifierName(token.pattern.specifier), token.pattern.minRepresentDigits); } + + if (offset == 0 && zeroOffsetText.has_value()) { + std::memcpy(result, zeroOffsetText->data(), zeroOffsetText->size()); + result += zeroOffsetText->size(); + break; + } + result += appendTimezoneOffset(offset, result); break; } diff --git a/velox/functions/lib/DateTimeFormatter.h b/velox/functions/lib/DateTimeFormatter.h index 7a0dc68f8b611..f11894cebd2b6 100644 --- a/velox/functions/lib/DateTimeFormatter.h +++ b/velox/functions/lib/DateTimeFormatter.h @@ -206,7 +206,8 @@ class DateTimeFormatter { const tz::TimeZone* timezone, const uint32_t maxResultSize, char* result, - bool allowOverflow = false) const; + bool allowOverflow = false, + const std::optional& zeroOffsetText = std::nullopt) const; private: std::unique_ptr literalBuf_; diff --git a/velox/functions/prestosql/DateTimeFunctions.h b/velox/functions/prestosql/DateTimeFunctions.h index 877514dac364d..5241af10b0cdb 100644 --- a/velox/functions/prestosql/DateTimeFunctions.h +++ b/velox/functions/prestosql/DateTimeFunctions.h @@ -1798,8 +1798,8 @@ struct ToISO8601Function { out_type& result) const { const auto maxResultSize = formatter_->maxResultSize(timeZone); result.reserve(maxResultSize); - const auto resultSize = - formatter_->format(timestamp, timeZone, maxResultSize, result.data()); + const auto resultSize = formatter_->format( + timestamp, timeZone, maxResultSize, result.data(), false, "Z"); result.resize(resultSize); } diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index d7b4165c33c6e..16ae8a77b4143 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -4512,11 +4512,11 @@ TEST_F(DateTimeFunctionsTest, toISO8601Timestamp) { "to_iso8601(c0)", std::make_optional(parseTimestamp(timestamp))); }; disableAdjustTimestampToTimezone(); - EXPECT_EQ("2024-11-01T10:00:00.000+00:00", toIso("2024-11-01 10:00")); - EXPECT_EQ("2024-11-04T10:00:00.000+00:00", toIso("2024-11-04 10:00")); - EXPECT_EQ("2024-11-04T15:05:34.100+00:00", toIso("2024-11-04 15:05:34.1")); - EXPECT_EQ("2024-11-04T15:05:34.123+00:00", toIso("2024-11-04 15:05:34.123")); - EXPECT_EQ("0022-11-01T10:00:00.000+00:00", toIso("22-11-01 10:00")); + EXPECT_EQ("2024-11-01T10:00:00.000Z", toIso("2024-11-01 10:00")); + EXPECT_EQ("2024-11-04T10:00:00.000Z", toIso("2024-11-04 10:00")); + EXPECT_EQ("2024-11-04T15:05:34.100Z", toIso("2024-11-04 15:05:34.1")); + EXPECT_EQ("2024-11-04T15:05:34.123Z", toIso("2024-11-04 15:05:34.123")); + EXPECT_EQ("0022-11-01T10:00:00.000Z", toIso("22-11-01 10:00")); setQueryTimeZone("America/Los_Angeles"); EXPECT_EQ("2024-11-01T03:00:00.000-07:00", toIso("2024-11-01 10:00")); @@ -4562,6 +4562,10 @@ TEST_F(DateTimeFunctionsTest, toISO8601TimestampWithTimezone) { EXPECT_EQ( "0022-11-01T10:00:00.000+05:41:16", toIso("22-11-01 10:00", "Asia/Kathmandu")); + + EXPECT_EQ("2024-11-01T10:00:00.000Z", toIso("2024-11-01 10:00", "UTC")); + EXPECT_EQ("2024-11-04T10:00:45.120Z", toIso("2024-11-04 10:00:45.12", "UTC")); + EXPECT_EQ("0022-11-01T10:00:00.000Z", toIso("22-11-01 10:00", "UTC")); } TEST_F(DateTimeFunctionsTest, atTimezoneTest) {