From 12f46cfd69876fefee5abed8118855bb7c5b9c1d Mon Sep 17 00:00:00 2001 From: Kevin Wilfong Date: Mon, 28 Oct 2024 12:29:23 -0700 Subject: [PATCH] Add support for z, zz, zzz in format_datetime (#11323) Summary: This diff adds support for JODA's z, zz, zzz patterns (all equivalent) in Presto's forma_datetime function. This is used to format time zone abbreviations. Reviewed By: pedroerp Differential Revision: D64774281 --- velox/functions/lib/DateTimeFormatter.cpp | 31 ++++++++++---- .../prestosql/tests/DateTimeFunctionsTest.cpp | 33 ++++++++++++--- velox/type/tz/TimeZoneMap.cpp | 40 ++++++++++++++----- velox/type/tz/TimeZoneMap.h | 9 +++++ velox/type/tz/tests/TimeZoneMapTest.cpp | 25 ++++++++++++ 5 files changed, 114 insertions(+), 24 deletions(-) diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 89215fe91704..ec4f973213ad 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -1203,10 +1203,17 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { size += std::max((int)token.pattern.minRepresentDigits, 9); break; case DateTimeFormatSpecifier::TIMEZONE: - VELOX_NYI( - "Date format specifier is not yet implemented: {} ({})", - getSpecifierName(token.pattern.specifier), - token.pattern.minRepresentDigits); + if (token.pattern.minRepresentDigits <= 3) { + // The longest abbreviation according to here is 5, e.g. some time + // zones use the offset as the abbreviation, like +0530. + // https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + size += 5; + } else { + VELOX_NYI( + "Date format specifier is not yet implemented: {} ({})", + getSpecifierName(token.pattern.specifier), + token.pattern.minRepresentDigits); + } break; case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: @@ -1451,9 +1458,19 @@ int32_t DateTimeFormatter::format( } break; case DateTimeFormatSpecifier::TIMEZONE: { - // TODO: implement short name time zone, need a map from full name to - // short name - VELOX_UNSUPPORTED("time zone name is not yet supported"); + VELOX_USER_CHECK_NOT_NULL( + timezone, + "The time zone cannot be formatted if it is not present."); + if (token.pattern.minRepresentDigits <= 3) { + const std::string& abbrev = timezone->getShortName( + std::chrono::milliseconds(timestamp.toMillis()), + tz::TimeZone::TChoose::kEarliest); + std::memcpy(result, abbrev.data(), abbrev.length()); + result += abbrev.length(); + } else { + // TODO: implement full name time zone + VELOX_NYI("full time zone name is not yet supported"); + } } break; case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: { diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 97807df92a0e..afa4f5a8ac91 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3269,6 +3269,28 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_EQ("+05:30", formatDatetime(parseTimestamp("1970-01-01"), "ZZ")); EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-01-01"), "Z")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zzz")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zz")); + EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "z")); + + // Test daylight savings. + setQueryTimeZone("America/Los_Angeles"); + EXPECT_EQ("PST", formatDatetime(parseTimestamp("1970-01-01"), "z")); + EXPECT_EQ("PDT", formatDatetime(parseTimestamp("1970-10-01"), "z")); + EXPECT_EQ("PST", formatDatetime(parseTimestamp("2024-03-10 01:00"), "z")); + EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-03-10 03:00"), "z")); + EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-11-03 01:00"), "z")); + EXPECT_EQ("PST", formatDatetime(parseTimestamp("2024-11-03 02:00"), "z")); + + // Test a long abbreviation. + setQueryTimeZone("Asia/Colombo"); + EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-10-01"), "z")); + + setQueryTimeZone("Asia/Kolkata"); + // We don't support more than 3 'z's yet. + EXPECT_THROW( + formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); + // Literal test cases. EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'")); EXPECT_EQ("'", formatDatetime(parseTimestamp("1970-01-01"), "''")); @@ -3313,15 +3335,14 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { EXPECT_THROW( formatDatetime(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError); - // System errors for patterns we haven't implemented yet. + // Time zone name patterns aren't supported when there isn't a time zone + // available. EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxUserError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxUserError); EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxRuntimeError); - EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError); + formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxUserError); } TEST_F(DateTimeFunctionsTest, formatDateTimeTimezone) { diff --git a/velox/type/tz/TimeZoneMap.cpp b/velox/type/tz/TimeZoneMap.cpp index 93dacadba677..2b7e8968ed04 100644 --- a/velox/type/tz/TimeZoneMap.cpp +++ b/velox/type/tz/TimeZoneMap.cpp @@ -242,6 +242,21 @@ void validateRangeImpl(time_point timePoint) { } } +template +date::zoned_time getZonedTime( + const date::time_zone* tz, + date::local_time timestamp, + TimeZone::TChoose choose) { + if (choose == TimeZone::TChoose::kFail) { + // By default, throws. + return date::zoned_time{tz, timestamp}; + } + + auto dateChoose = (choose == TimeZone::TChoose::kEarliest) + ? date::choose::earliest + : date::choose::latest; + return date::zoned_time{tz, timestamp, dateChoose}; +} } // namespace void validateRange(time_point timePoint) { @@ -337,17 +352,7 @@ TimeZone::seconds TimeZone::to_sys( return (timePoint - offset_).time_since_epoch(); } - if (choose == TimeZone::TChoose::kFail) { - // By default, throws. - return date::zoned_time{tz_, timePoint}.get_sys_time().time_since_epoch(); - } - - auto dateChoose = (choose == TimeZone::TChoose::kEarliest) - ? date::choose::earliest - : date::choose::latest; - return date::zoned_time{tz_, timePoint, dateChoose} - .get_sys_time() - .time_since_epoch(); + return getZonedTime(tz_, timePoint, choose).get_sys_time().time_since_epoch(); } TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { @@ -361,4 +366,17 @@ TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const { return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch(); } +std::string TimeZone::getShortName( + TimeZone::milliseconds timestamp, + TimeZone::TChoose choose) const { + date::local_time timePoint{timestamp}; + validateRange(date::sys_time(timestamp)); + + // Time zone offsets only have one name (no abbreviations). + if (tz_ == nullptr) { + return timeZoneName_; + } + + return getZonedTime(tz_, timePoint, choose).get_info().abbrev; +} } // namespace facebook::velox::tz diff --git a/velox/type/tz/TimeZoneMap.h b/velox/type/tz/TimeZoneMap.h index 9554d7506328..c7e5c0f6087e 100644 --- a/velox/type/tz/TimeZoneMap.h +++ b/velox/type/tz/TimeZoneMap.h @@ -113,6 +113,7 @@ class TimeZone { } using seconds = std::chrono::seconds; + using milliseconds = std::chrono::milliseconds; /// Converts a local time (the time as perceived in the user time zone /// represented by this object) to a system time (the corresponding time in @@ -151,6 +152,14 @@ class TimeZone { return tz_; } + /// Returns the short name (abbreviation) of the time zone for the given + /// timestamp. Note that the timestamp is needed for time zones that support + /// daylight savings time as the short name will change depending on the date + /// (e.g. PST/PDT). + std::string getShortName( + milliseconds timestamp, + TChoose choose = TChoose::kFail) const; + private: const date::time_zone* tz_{nullptr}; const std::chrono::minutes offset_{0}; diff --git a/velox/type/tz/tests/TimeZoneMapTest.cpp b/velox/type/tz/tests/TimeZoneMapTest.cpp index c56d6ef9c3ac..0d0fa21cb43c 100644 --- a/velox/type/tz/tests/TimeZoneMapTest.cpp +++ b/velox/type/tz/tests/TimeZoneMapTest.cpp @@ -234,5 +234,30 @@ TEST(TimeZoneMapTest, invalid) { VELOX_ASSERT_THROW(getTimeZoneID("etc/GMT+300"), "Unknown time zone"); } +TEST(TimeZoneMapTest, getShortName) { + auto toShortName = [&](std::string_view name, size_t ts) { + const auto* tz = locateZone(name); + EXPECT_NE(tz, nullptr); + return tz->getShortName(milliseconds{ts}); + }; + + // Test an offset that maps to an actual time zone. + EXPECT_EQ("UTC", toShortName("+00:00", 0)); + + // Test offsets that do not map to named time zones. + EXPECT_EQ("+00:01", toShortName("+00:01", 0)); + EXPECT_EQ("-00:01", toShortName("-00:01", 0)); + EXPECT_EQ("+01:00", toShortName("+01:00", 0)); + EXPECT_EQ("-01:01", toShortName("-01:01", 0)); + + // In "2024-07-25", America/Los_Angeles was in daylight savings time (UTC-07). + size_t ts = 1721890800000; + EXPECT_EQ("PDT", toShortName("America/Los_Angeles", ts)); + + // In "2024-01-01", it was not (UTC-08). + ts = 1704096000000; + EXPECT_EQ("PST", toShortName("America/Los_Angeles", ts)); +} + } // namespace } // namespace facebook::velox::tz