From d8b960c6c4e823b3c1ec427903271b55ff4f1a99 Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Thu, 24 Oct 2024 15:53:57 -0700 Subject: [PATCH] Add support for z, zz, zzz in format_datetime (#11323) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11323 This diff adds support for JODA's z, zz, zzz patterns (all equivalent) in Presto's forma_datetime function. This is used to format time zone abbreviations. Differential Revision: D64774281 --- velox/functions/lib/DateTimeFormatter.cpp | 31 ++++++++++++---- .../prestosql/tests/DateTimeFunctionsTest.cpp | 29 ++++++++++++--- velox/type/tz/TimeZoneMap.cpp | 37 +++++++++++++------ velox/type/tz/TimeZoneMap.h | 4 ++ 4 files changed, 76 insertions(+), 25 deletions(-) diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 476ae23a8e39..7aa17faee1c4 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -1167,10 +1167,17 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { size += std::max((int)token.pattern.minRepresentDigits, 9); break; case DateTimeFormatSpecifier::TIMEZONE: - VELOX_NYI( - "Date format specifier is not yet implemented: {} ({})", - getSpecifierName(token.pattern.specifier), - token.pattern.minRepresentDigits); + if (token.pattern.minRepresentDigits <= 3) { + // The longest abbreviation according to here is 5, e.g. some time + // zones use the offset as the abbreviation, like +0530. + // https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + size += 5; + } else { + VELOX_NYI( + "Date format specifier is not yet implemented: {} ({})", + getSpecifierName(token.pattern.specifier), + token.pattern.minRepresentDigits); + } break; case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: @@ -1415,9 +1422,19 @@ int32_t DateTimeFormatter::format( } break; case DateTimeFormatSpecifier::TIMEZONE: { - // TODO: implement short name time zone, need a map from full name - // to short name - VELOX_UNSUPPORTED("time zone name is not yet supported"); + VELOX_USER_CHECK_NOT_NULL( + timezone, + "The time zone cannot be formatted if it is not present."); + if (token.pattern.minRepresentDigits <= 3) { + const std::string& abbrev = timezone->getShortName( + std::chrono::seconds(timestamp.getSeconds()), + tz::TimeZone::TChoose::kEarliest); + std::memcpy(result, abbrev.data(), abbrev.length()); + result += abbrev.length(); + } else { + // TODO: implement full name time zone + VELOX_NYI("full time zone name is not yet supported"); + } } break; case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: { diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 97807df92a0e..661874dda3fb 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3269,6 +3269,24 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_EQ("+05:30", formatDatetime(parseTimestamp("1970-01-01"), "ZZ")); EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-01-01"), "Z")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zzz")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zz")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "z")); + + // Test daylight savings. + setQueryTimeZone("America/Los_Angeles"); + EXPECT_EQ("PST", formatDatetime(parseTimestamp("1970-01-01"), "z")); + EXPECT_EQ("PDT", formatDatetime(parseTimestamp("1970-10-01"), "z")); + + // Test a long abbreviation. + setQueryTimeZone("Asia/Colombo"); + EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-10-01"), "z")); + + setQueryTimeZone("Asia/Kolkata"); + // We don't support more than 3 'z's yet. + EXPECT_THROW( + formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); + // Literal test cases. EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'")); EXPECT_EQ("'", formatDatetime(parseTimestamp("1970-01-01"), "''")); @@ -3313,15 +3331,14 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_THROW( formatDatetime(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError); - // System errors for patterns we haven't implemented yet. + // Time zone name patterns aren't supported when there isn't a time zone + // available. EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxUserError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxUserError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxRuntimeError); - EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxUserError); } TEST_F(DateTimeFunctionsTest, formatDateTimeTimezone) { diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 93dacadba677..2d828f4b130e 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -242,6 +242,20 @@ void validateRangeImpl(time_point timePoint) { } } +date::zoned_time getZonedTime( + const date::time_zone* tz, + date::local_seconds timestamp, + TimeZone::TChoose choose) { + if (choose == TimeZone::TChoose::kFail) { + // By default, throws. + return date::zoned_time{tz, timestamp}; + } + + auto dateChoose = (choose == TimeZone::TChoose::kEarliest) + ? date::choose::earliest + : date::choose::latest; + return date::zoned_time{tz, timestamp, dateChoose}; +} } // namespace void validateRange(time_point timePoint) { @@ -334,20 +348,10 @@ TimeZone::seconds TimeZone::to_sys( if (tz_ == nullptr) { // We can ignore `choose` as time offset conversions are always linear. - return (timePoint - offset_).time_since_epoch(); - } - - if (choose == TimeZone::TChoose::kFail) { - // By default, throws. - return date::zoned_time{tz_, timePoint}.get_sys_time().time_since_epoch(); + return (date::local_seconds(timestamp) - offset_).time_since_epoch(); } - auto dateChoose = (choose == TimeZone::TChoose::kEarliest) - ? date::choose::earliest - : date::choose::latest; - return date::zoned_time{tz_, timePoint, dateChoose} - .get_sys_time() - .time_since_epoch(); + return getZonedTime(tz_, timePoint, choose).get_sys_time().time_since_epoch(); } TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { @@ -361,4 +365,13 @@ TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch(); } +std::string TimeZone::getShortName( + TimeZone::seconds timestamp, + TimeZone::TChoose choose) const { + date::local_seconds timePoint{timestamp}; + validateRange(date::sys_seconds{timestamp}); + + return getZonedTime(tz_, timePoint, choose).get_info().abbrev; +} + } // namespace facebook::velox::tz diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 9554d7506328..f72e2e86fb65 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -151,6 +151,10 @@ class TimeZone { return tz_; } + std::string getShortName( + TimeZone::seconds timestamp, + TimeZone::TChoose choose = TChoose::kFail) const; + private: const date::time_zone* tz_{nullptr}; const std::chrono::minutes offset_{0};