Skip to content

Commit

Permalink
Add support for z, zz, zzz in format_datetime (facebookincubator#11323)
Browse files Browse the repository at this point in the history
Summary:

This diff adds support for JODA's z, zz, zzz patterns (all equivalent) in Presto's
forma_datetime function.

This is used to format time zone abbreviations.

Reviewed By: pedroerp

Differential Revision: D64774281
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Oct 28, 2024
1 parent eccd85e commit d0b8ff5
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 24 deletions.
31 changes: 24 additions & 7 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1203,10 +1203,17 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const {
size += std::max((int)token.pattern.minRepresentDigits, 9);
break;
case DateTimeFormatSpecifier::TIMEZONE:
VELOX_NYI(
"Date format specifier is not yet implemented: {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);
if (token.pattern.minRepresentDigits <= 3) {
// The longest abbreviation according to here is 5, e.g. some time
// zones use the offset as the abbreviation, like +0530.
// https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
size += 5;
} else {
VELOX_NYI(
"Date format specifier is not yet implemented: {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);
}

break;
case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID:
Expand Down Expand Up @@ -1451,9 +1458,19 @@ int32_t DateTimeFormatter::format(
} break;

case DateTimeFormatSpecifier::TIMEZONE: {
// TODO: implement short name time zone, need a map from full name to
// short name
VELOX_UNSUPPORTED("time zone name is not yet supported");
VELOX_USER_CHECK_NOT_NULL(
timezone,
"The time zone cannot be formatted if it is not present.");
if (token.pattern.minRepresentDigits <= 3) {
const std::string& abbrev = timezone->getShortName(
std::chrono::milliseconds(timestamp.toMillis()),
tz::TimeZone::TChoose::kEarliest);
std::memcpy(result, abbrev.data(), abbrev.length());
result += abbrev.length();
} else {
// TODO: implement full name time zone
VELOX_NYI("full time zone name is not yet supported");
}
} break;

case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: {
Expand Down
33 changes: 27 additions & 6 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3269,6 +3269,28 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {
EXPECT_EQ("+05:30", formatDatetime(parseTimestamp("1970-01-01"), "ZZ"));
EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-01-01"), "Z"));

EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zzz"));
EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zz"));
EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "z"));

// Test daylight savings.
setQueryTimeZone("America/Los_Angeles");
EXPECT_EQ("PST", formatDatetime(parseTimestamp("1970-01-01"), "z"));
EXPECT_EQ("PDT", formatDatetime(parseTimestamp("1970-10-01"), "z"));
EXPECT_EQ("PST", formatDatetime(parseTimestamp("2024-03-10 01:00"), "z"));
EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-03-10 03:00"), "z"));
EXPECT_EQ("PDT", formatDatetime(parseTimestamp("2024-11-03 01:00"), "z"));
EXPECT_EQ("PST", formatDatetime(parseTimestamp("2024-11-03 02:00"), "z"));

// Test a long abbreviation.
setQueryTimeZone("Asia/Colombo");
EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-10-01"), "z"));

setQueryTimeZone("Asia/Kolkata");
// We don't support more than 3 'z's yet.
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError);

// Literal test cases.
EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'"));
EXPECT_EQ("'", formatDatetime(parseTimestamp("1970-01-01"), "''"));
Expand Down Expand Up @@ -3313,15 +3335,14 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError);

// System errors for patterns we haven't implemented yet.
// Time zone name patterns aren't supported when there isn't a time zone
// available.
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxRuntimeError);
formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxUserError);
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxRuntimeError);
formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxUserError);
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxRuntimeError);
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError);
formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxUserError);
}

TEST_F(DateTimeFunctionsTest, formatDateTimeTimezone) {
Expand Down
40 changes: 29 additions & 11 deletions velox/type/tz/TimeZoneMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,21 @@ void validateRangeImpl(time_point<TDuration> timePoint) {
}
}

template <typename TDuration>
date::zoned_time<TDuration> getZonedTime(
const date::time_zone* tz,
date::local_time<TDuration> timestamp,
TimeZone::TChoose choose) {
if (choose == TimeZone::TChoose::kFail) {
// By default, throws.
return date::zoned_time{tz, timestamp};
}

auto dateChoose = (choose == TimeZone::TChoose::kEarliest)
? date::choose::earliest
: date::choose::latest;
return date::zoned_time{tz, timestamp, dateChoose};
}
} // namespace

void validateRange(time_point<std::chrono::seconds> timePoint) {
Expand Down Expand Up @@ -337,17 +352,7 @@ TimeZone::seconds TimeZone::to_sys(
return (timePoint - offset_).time_since_epoch();
}

if (choose == TimeZone::TChoose::kFail) {
// By default, throws.
return date::zoned_time{tz_, timePoint}.get_sys_time().time_since_epoch();
}

auto dateChoose = (choose == TimeZone::TChoose::kEarliest)
? date::choose::earliest
: date::choose::latest;
return date::zoned_time{tz_, timePoint, dateChoose}
.get_sys_time()
.time_since_epoch();
return getZonedTime(tz_, timePoint, choose).get_sys_time().time_since_epoch();
}

TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const {
Expand All @@ -361,4 +366,17 @@ TimeZone::seconds TimeZone::to_local(TimeZone::seconds timestamp) const {
return date::zoned_time{tz_, timePoint}.get_local_time().time_since_epoch();
}

std::string TimeZone::getShortName(
TimeZone::milliseconds timestamp,
TimeZone::TChoose choose) const {
date::local_time<milliseconds> timePoint{timestamp};
validateRange(date::sys_time<milliseconds>(timestamp));

// Time zone offsets only have one name (no abbreviations).
if (tz_ == nullptr) {
return timeZoneName_;
}

return getZonedTime(tz_, timePoint, choose).get_info().abbrev;
}
} // namespace facebook::velox::tz
9 changes: 9 additions & 0 deletions velox/type/tz/TimeZoneMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class TimeZone {
}

using seconds = std::chrono::seconds;
using milliseconds = std::chrono::milliseconds;

/// Converts a local time (the time as perceived in the user time zone
/// represented by this object) to a system time (the corresponding time in
Expand Down Expand Up @@ -151,6 +152,14 @@ class TimeZone {
return tz_;
}

/// Returns the short name (abbreviation) of the time zone for the given
/// timestamp. Note that the timestamp is needed for time zones that support
/// daylight savings time as the short name will change depending on the date
/// (e.g. PST/PDT).
std::string getShortName(
milliseconds timestamp,
TChoose choose = TChoose::kFail) const;

private:
const date::time_zone* tz_{nullptr};
const std::chrono::minutes offset_{0};
Expand Down
25 changes: 25 additions & 0 deletions velox/type/tz/tests/TimeZoneMapTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,5 +234,30 @@ TEST(TimeZoneMapTest, invalid) {
VELOX_ASSERT_THROW(getTimeZoneID("etc/GMT+300"), "Unknown time zone");
}

TEST(TimeZoneMapTest, getShortName) {
auto toShortName = [&](std::string_view name, size_t ts) {
const auto* tz = locateZone(name);
EXPECT_NE(tz, nullptr);
return tz->getShortName(milliseconds{ts});
};

// Test an offset that maps to an actual time zone.
EXPECT_EQ("UTC", toShortName("+00:00", 0));

// Test offsets that do not map to named time zones.
EXPECT_EQ("+00:01", toShortName("+00:01", 0));
EXPECT_EQ("-00:01", toShortName("-00:01", 0));
EXPECT_EQ("+01:00", toShortName("+01:00", 0));
EXPECT_EQ("-01:01", toShortName("-01:01", 0));

// In "2024-07-25", America/Los_Angeles was in daylight savings time (UTC-07).
size_t ts = 1721890800000;
EXPECT_EQ("PDT", toShortName("America/Los_Angeles", ts));

// In "2024-01-01", it was not (UTC-08).
ts = 1704096000000;
EXPECT_EQ("PST", toShortName("America/Los_Angeles", ts));
}

} // namespace
} // namespace facebook::velox::tz

0 comments on commit d0b8ff5

Please sign in to comment.