From c35ca80ca2174dc894dfafc0912131c740183071 Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Thu, 24 Oct 2024 14:32:42 -0700 Subject: [PATCH] Fix Presto's format_datetime function with time zone (#11283) Summary: The format_datetime currently outptus the time zone id if for 3 or fewer 'z' characters in the format string. However, the JODA library, which this is based on, does this for 3 or more 'Z' characters. This diff fixes this, as well as adds support for a single 'Z' (which outputs the same thing as 'ZZ' just without the colon). So 'Z' is fully supported for any number of characters. To be more explicit: https://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html From the JODA docs: `'Z' outputs offset without a colon (-0800), 'ZZ' outputs the offset with a colon(-08:00), 'ZZZ' or more outputs the zone id(America/Los_Angeles).` And not clearly explained in the docs, but from experimentation: `'z', 'zz', or 'zzz' outputs the abbreviation of the time zone(PDT), and 'zzzz' or more outputs the time zone name(Pacific Daylight Time)` Currently DateTimeFormatter treats 'zzzz' or more like JODA treats 'ZZZ' or more. This diff marks 'zzzz' or more as unsupported (we can implement that in a future change), and moves that logic under 'ZZZ' or more to be consistent. It also implements 'Z' (previously only 'ZZ' was implemented in DateTimeFormatter). Reviewed By: bikramSingh91 Differential Revision: D64500193 --- velox/functions/lib/DateTimeFormatter.cpp | 63 ++++++++++--------- .../prestosql/tests/DateTimeFunctionsTest.cpp | 29 ++++++--- .../types/TimestampWithTimeZoneType.cpp | 2 +- 3 files changed, 53 insertions(+), 41 deletions(-) diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index dc58e6d527aee..f141fdd9371ff 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -513,7 +513,7 @@ std::string formatFractionOfSecond( return toAdd; } -int32_t appendTimezoneOffset(int64_t offset, char* result) { +int32_t appendTimezoneOffset(int64_t offset, char* result, bool includeColon) { int pos = 0; if (offset >= 0) { result[pos++] = '+'; @@ -531,7 +531,9 @@ int32_t appendTimezoneOffset(int64_t offset, char* result) { result[pos++] = char(hours % 10 + '0'); } - result[pos++] = ':'; + if (includeColon) { + result[pos++] = ':'; + } const auto minutes = (offset / 60) % 60; if LIKELY (minutes == 0) { @@ -1036,20 +1038,26 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { size += std::max((int)token.pattern.minRepresentDigits, 9); break; case DateTimeFormatSpecifier::TIMEZONE: - if (timezone == nullptr) { - VELOX_USER_FAIL("Timezone unknown"); - } - size += std::max( - token.pattern.minRepresentDigits, timezone->name().length()); + VELOX_NYI( + "Date format specifier is not yet implemented: {} ({})", + getSpecifierName(token.pattern.specifier), + token.pattern.minRepresentDigits); + break; case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: - if (token.pattern.minRepresentDigits != 2) { - VELOX_UNSUPPORTED( - "Date format specifier is not supported: {} ({})", - getSpecifierName(token.pattern.specifier), - token.pattern.minRepresentDigits); + if (token.pattern.minRepresentDigits == 1) { + // 'Z' means output the time zone offset without a colon. + size += 8; + } else if (token.pattern.minRepresentDigits == 2) { + // 'ZZ' means output the time zone offset with a colon. + size += 9; + } else { + // 'ZZZ' (or more) means otuput the time zone ID. + if (timezone == nullptr) { + VELOX_USER_FAIL("Timezone unknown"); + } + size += timezone->name().length(); } - size += 9; break; // Not supported. case DateTimeFormatSpecifier::WEEK_YEAR: @@ -1280,35 +1288,28 @@ int32_t DateTimeFormatter::format( case DateTimeFormatSpecifier::TIMEZONE: { // TODO: implement short name time zone, need a map from full name to // short name - if (token.pattern.minRepresentDigits <= 3) { - VELOX_UNSUPPORTED("short name time zone is not yet supported"); - } - if (timezone == nullptr) { - VELOX_USER_FAIL("Timezone unknown"); - } - const auto& piece = timezone->name(); - std::memcpy(result, piece.data(), piece.length()); - result += piece.length(); + VELOX_UNSUPPORTED("time zone name is not yet supported"); } break; case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: { // Zone: 'Z' outputs offset without a colon, 'ZZ' outputs the offset // with a colon, 'ZZZ' or more outputs the zone id. - // TODO Add support for 'Z' and 'ZZZ'. - if (token.pattern.minRepresentDigits != 2) { - VELOX_UNSUPPORTED( - "format is not supported for specifier {} ({})", - getSpecifierName(token.pattern.specifier), - token.pattern.minRepresentDigits); - } - if (offset == 0 && zeroOffsetText.has_value()) { std::memcpy(result, zeroOffsetText->data(), zeroOffsetText->size()); result += zeroOffsetText->size(); break; } - result += appendTimezoneOffset(offset, result); + if (token.pattern.minRepresentDigits >= 3) { + // Append the time zone ID. + const auto& piece = timezone->name(); + std::memcpy(result, piece.data(), piece.length()); + result += piece.length(); + break; + } + + result += appendTimezoneOffset( + offset, result, token.pattern.minRepresentDigits == 2); break; } case DateTimeFormatSpecifier::WEEK_OF_WEEK_YEAR: { diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 16ae8a77b4143..e9d7c378eb106 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3214,11 +3214,18 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { "0010", formatDatetime(parseTimestamp("2022-01-01 03:30:30.001"), "SSSS")); - // Time zone test cases - 'z' + // Time zone test cases - 'Z' setQueryTimeZone("Asia/Kolkata"); EXPECT_EQ( - "Asia/Kolkata", formatDatetime(parseTimestamp("1970-01-01"), "zzzz")); + "Asia/Kolkata", + formatDatetime( + parseTimestamp("1970-01-01"), "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ")); + EXPECT_EQ( + "Asia/Kolkata", formatDatetime(parseTimestamp("1970-01-01"), "ZZZZ")); + EXPECT_EQ( + "Asia/Kolkata", formatDatetime(parseTimestamp("1970-01-01"), "ZZZ")); EXPECT_EQ("+05:30", formatDatetime(parseTimestamp("1970-01-01"), "ZZ")); + EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-01-01"), "Z")); // Literal test cases. EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'")); @@ -3243,12 +3250,12 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { "AD 19 1970 4 Thu 1970 1 1 1 AM 8 8 8 8 3 11 5 Asia/Kolkata", formatDatetime( parseTimestamp("1970-01-01 02:33:11.5"), - "G C Y e E y D M d a K h H k m s S zzzz")); + "G C Y e E y D M d a K h H k m s S ZZZ")); EXPECT_EQ( "AD 19 1970 4 asdfghjklzxcvbnmqwertyuiop Thu ' 1970 1 1 1 AM 8 8 8 8 3 11 5 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ Asia/Kolkata", formatDatetime( parseTimestamp("1970-01-01 02:33:11.5"), - "G C Y e 'asdfghjklzxcvbnmqwertyuiop' E '' y D M d a K h H k m s S 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ zzzz")); + "G C Y e 'asdfghjklzxcvbnmqwertyuiop' E '' y D M d a K h H k m s S 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ ZZZ")); disableAdjustTimestampToTimezone(); EXPECT_EQ( @@ -3260,15 +3267,19 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_THROW( formatDatetime(parseTimestamp("1970-01-01"), "x"), VeloxUserError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxUserError); + formatDatetime(parseTimestamp("1970-01-01"), "q"), VeloxUserError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxUserError); + formatDatetime(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError); + + // System errors for patterns we haven't implemented yet. EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxUserError); + formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxRuntimeError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "q"), VeloxUserError); + formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxRuntimeError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError); + formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxRuntimeError); + EXPECT_THROW( + formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); } TEST_F(DateTimeFunctionsTest, formatDateTimeTimezone) { diff --git a/velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp b/velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp index f1865e55830e2..de833dd77f2df 100644 --- a/velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp +++ b/velox/functions/prestosql/types/TimestampWithTimeZoneType.cpp @@ -110,7 +110,7 @@ void castToString( const auto* timestamps = input.as>(); auto expectedFormatter = - functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS zzzz"); + functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS ZZZ"); VELOX_CHECK( !expectedFormatter.hasError(), "Default format should always be valid, error: {}",