Skip to content

Commit

Permalink
Fix Presto's format_datetime function with time zone (facebookincubat…
Browse files Browse the repository at this point in the history
…or#11283)

Summary:

The format_datetime currently outptus the time zone id if for 3 or fewer 'z' characters
in the format string.  However, the JODA library, which this is based on, does this for
3 or more 'Z' characters.

This diff fixes this, as well as adds support for a single 'Z' (which outputs the same
thing as 'ZZ' just without the colon).  So 'Z' is fully supported for any number of
characters.

To be more explicit:

https://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html

From the JODA docs:
`'Z' outputs offset without a colon (-0800), 'ZZ' outputs the offset with a 
colon(-08:00), 'ZZZ' or more outputs the zone id(America/Los_Angeles).`

And not clearly explained in the docs, but from experimentation:
`'z', 'zz', or 'zzz' outputs the abbreviation of the time zone(PDT), and 'zzzz' or more 
outputs the time zone name(Pacific Daylight Time)`

Currently DateTimeFormatter treats 'zzzz' or more like JODA treats 'ZZZ' or more.
This diff marks 'zzzz' or more as unsupported (we can implement that in a future
change), and moves that logic under 'ZZZ' or more to be consistent.

It also implements 'Z' (previously only 'ZZ' was implemented in DateTimeFormatter).

Reviewed By: bikramSingh91

Differential Revision: D64500193
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Oct 28, 2024
1 parent 624a21c commit 66e5679
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 41 deletions.
63 changes: 32 additions & 31 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ std::string formatFractionOfSecond(
return toAdd;
}

int32_t appendTimezoneOffset(int64_t offset, char* result) {
int32_t appendTimezoneOffset(int64_t offset, char* result, bool includeColon) {
int pos = 0;
if (offset >= 0) {
result[pos++] = '+';
Expand All @@ -536,7 +536,9 @@ int32_t appendTimezoneOffset(int64_t offset, char* result) {
result[pos++] = char(hours % 10 + '0');
}

result[pos++] = ':';
if (includeColon) {
result[pos++] = ':';
}

const auto minutes = (offset / 60) % 60;
if LIKELY (minutes == 0) {
Expand Down Expand Up @@ -1068,20 +1070,26 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const {
size += std::max((int)token.pattern.minRepresentDigits, 9);
break;
case DateTimeFormatSpecifier::TIMEZONE:
if (timezone == nullptr) {
VELOX_USER_FAIL("Timezone unknown");
}
size += std::max(
token.pattern.minRepresentDigits, timezone->name().length());
VELOX_NYI(
"Date format specifier is not yet implemented: {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);

break;
case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID:
if (token.pattern.minRepresentDigits != 2) {
VELOX_UNSUPPORTED(
"Date format specifier is not supported: {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);
if (token.pattern.minRepresentDigits == 1) {
// 'Z' means output the time zone offset without a colon.
size += 8;
} else if (token.pattern.minRepresentDigits == 2) {
// 'ZZ' means output the time zone offset with a colon.
size += 9;
} else {
// 'ZZZ' (or more) means otuput the time zone ID.
if (timezone == nullptr) {
VELOX_USER_FAIL("Timezone unknown");
}
size += timezone->name().length();
}
size += 9;
break;
// Not supported.
case DateTimeFormatSpecifier::WEEK_YEAR:
Expand Down Expand Up @@ -1312,35 +1320,28 @@ int32_t DateTimeFormatter::format(
case DateTimeFormatSpecifier::TIMEZONE: {
// TODO: implement short name time zone, need a map from full name to
// short name
if (token.pattern.minRepresentDigits <= 3) {
VELOX_UNSUPPORTED("short name time zone is not yet supported");
}
if (timezone == nullptr) {
VELOX_USER_FAIL("Timezone unknown");
}
const auto& piece = timezone->name();
std::memcpy(result, piece.data(), piece.length());
result += piece.length();
VELOX_UNSUPPORTED("time zone name is not yet supported");
} break;

case DateTimeFormatSpecifier::TIMEZONE_OFFSET_ID: {
// Zone: 'Z' outputs offset without a colon, 'ZZ' outputs the offset
// with a colon, 'ZZZ' or more outputs the zone id.
// TODO Add support for 'Z' and 'ZZZ'.
if (token.pattern.minRepresentDigits != 2) {
VELOX_UNSUPPORTED(
"format is not supported for specifier {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);
}

if (offset == 0 && zeroOffsetText.has_value()) {
std::memcpy(result, zeroOffsetText->data(), zeroOffsetText->size());
result += zeroOffsetText->size();
break;
}

result += appendTimezoneOffset(offset, result);
if (token.pattern.minRepresentDigits >= 3) {
// Append the time zone ID.
const auto& piece = timezone->name();
std::memcpy(result, piece.data(), piece.length());
result += piece.length();
break;
}

result += appendTimezoneOffset(
offset, result, token.pattern.minRepresentDigits == 2);
break;
}
case DateTimeFormatSpecifier::WEEK_OF_WEEK_YEAR: {
Expand Down
29 changes: 20 additions & 9 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3214,11 +3214,18 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {
"0010",
formatDatetime(parseTimestamp("2022-01-01 03:30:30.001"), "SSSS"));

// Time zone test cases - 'z'
// Time zone test cases - 'Z'
setQueryTimeZone("Asia/Kolkata");
EXPECT_EQ(
"Asia/Kolkata", formatDatetime(parseTimestamp("1970-01-01"), "zzzz"));
"Asia/Kolkata",
formatDatetime(
parseTimestamp("1970-01-01"), "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ"));
EXPECT_EQ(
"Asia/Kolkata", formatDatetime(parseTimestamp("1970-01-01"), "ZZZZ"));
EXPECT_EQ(
"Asia/Kolkata", formatDatetime(parseTimestamp("1970-01-01"), "ZZZ"));
EXPECT_EQ("+05:30", formatDatetime(parseTimestamp("1970-01-01"), "ZZ"));
EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-01-01"), "Z"));

// Literal test cases.
EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'"));
Expand All @@ -3243,12 +3250,12 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {
"AD 19 1970 4 Thu 1970 1 1 1 AM 8 8 8 8 3 11 5 Asia/Kolkata",
formatDatetime(
parseTimestamp("1970-01-01 02:33:11.5"),
"G C Y e E y D M d a K h H k m s S zzzz"));
"G C Y e E y D M d a K h H k m s S ZZZ"));
EXPECT_EQ(
"AD 19 1970 4 asdfghjklzxcvbnmqwertyuiop Thu ' 1970 1 1 1 AM 8 8 8 8 3 11 5 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ Asia/Kolkata",
formatDatetime(
parseTimestamp("1970-01-01 02:33:11.5"),
"G C Y e 'asdfghjklzxcvbnmqwertyuiop' E '' y D M d a K h H k m s S 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ zzzz"));
"G C Y e 'asdfghjklzxcvbnmqwertyuiop' E '' y D M d a K h H k m s S 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ ZZZ"));

disableAdjustTimestampToTimezone();
EXPECT_EQ(
Expand All @@ -3260,15 +3267,19 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "x"), VeloxUserError);
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxUserError);
formatDatetime(parseTimestamp("1970-01-01"), "q"), VeloxUserError);
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxUserError);
formatDatetime(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError);

// System errors for patterns we haven't implemented yet.
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxUserError);
formatDatetime(parseTimestamp("1970-01-01"), "z"), VeloxRuntimeError);
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "q"), VeloxUserError);
formatDatetime(parseTimestamp("1970-01-01"), "zz"), VeloxRuntimeError);
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "'abcd"), VeloxUserError);
formatDatetime(parseTimestamp("1970-01-01"), "zzz"), VeloxRuntimeError);
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError);
}

TEST_F(DateTimeFunctionsTest, formatDateTimeTimezone) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ void castToString(
const auto* timestamps = input.as<SimpleVector<int64_t>>();

auto expectedFormatter =
functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS zzzz");
functions::buildJodaDateTimeFormatter("yyyy-MM-dd HH:mm:ss.SSS ZZZ");
VELOX_CHECK(
!expectedFormatter.hasError(),
"Default format should always be valid, error: {}",
Expand Down

0 comments on commit 66e5679

Please sign in to comment.