Skip to content

Commit

Permalink
Add support for zzzz (and beyond) in format_datetime (#11330)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11330

This diff adds support for JODA's zzzz (or more) patterns (all equivalent) in Presto's
forma_datetime function.

This is used to format long time zone names.

Long time zone names are not available from the IANA time zone database, so we
can't use the tz library to generate these.  Fortunately, unicode provides some
utilities to generate these.

Differential Revision: D64795407
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Oct 24, 2024
1 parent c7820f4 commit 81b1219
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 12 deletions.
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,18 @@ add_compile_definitions(FOLLY_HAVE_INT128_T=1)
set_source(folly)
resolve_dependency(folly)

if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
if(ON_APPLE_M1)
list(APPEND CMAKE_PREFIX_PATH "/opt/homebrew/opt/icu4c")
else()
list(APPEND CMAKE_PREFIX_PATH "/usr/local/opt/icu4c")
endif()
endif()

# ICU is only needed with Boost build from source
set_source(ICU)
resolve_dependency(ICU COMPONENTS i18n uc)

if(${VELOX_BUILD_TESTING})
# Spark qury runner depends on absl, gRPC.
set_source(absl)
Expand Down
3 changes: 2 additions & 1 deletion velox/functions/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ velox_link_libraries(velox_functions_util velox_vector velox_common_base)
velox_add_library(velox_functions_lib_date_time_formatter DateTimeFormatter.cpp
DateTimeFormatterBuilder.cpp)

velox_link_libraries(velox_functions_lib_date_time_formatter velox_type_tz)
velox_link_libraries(velox_functions_lib_date_time_formatter velox_type_tz
ICU::i18n ICU::uc)

velox_add_library(
velox_functions_lib
Expand Down
14 changes: 8 additions & 6 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1173,10 +1173,10 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const {
// https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
size += 5;
} else {
VELOX_NYI(
"Date format specifier is not yet implemented: {} ({})",
getSpecifierName(token.pattern.specifier),
token.pattern.minRepresentDigits);
// The longest time zone long name is 40, Australian Central Western
// Standard Time.
// https://www.timeanddate.com/time/zones/
size += 50;
}

break;
Expand Down Expand Up @@ -1431,8 +1431,10 @@ int32_t DateTimeFormatter::format(
std::memcpy(result, abbrev.data(), abbrev.length());
result += abbrev.length();
} else {
// TODO: implement full name time zone
VELOX_NYI("full time zone name is not yet supported");
std::string longName = timezone->getLongName(
std::chrono::seconds(timestamp.getSeconds()));
std::memcpy(result, longName.data(), longName.length());
result += longName.length();
}
} break;

Expand Down
26 changes: 22 additions & 4 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3272,21 +3272,39 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {
EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zzz"));
EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "zz"));
EXPECT_EQ("IST", formatDatetime(parseTimestamp("1970-01-01"), "z"));
EXPECT_EQ(
"India Standard Time",
formatDatetime(parseTimestamp("1970-01-01"), "zzzz"));
EXPECT_EQ(
"India Standard Time",
formatDatetime(parseTimestamp("1970-01-01"), "zzzzzzzzzzzzzzzzzzzzzz"));

// Test daylight savings.
setQueryTimeZone("America/Los_Angeles");
EXPECT_EQ("PST", formatDatetime(parseTimestamp("1970-01-01"), "z"));
EXPECT_EQ("PDT", formatDatetime(parseTimestamp("1970-10-01"), "z"));
EXPECT_EQ(
"Pacific Standard Time",
formatDatetime(parseTimestamp("1970-01-01"), "zzzz"));
EXPECT_EQ(
"Pacific Daylight Time",
formatDatetime(parseTimestamp("1970-10-01"), "zzzz"));

// Test a long abbreviation.
setQueryTimeZone("Asia/Colombo");
EXPECT_EQ("+0530", formatDatetime(parseTimestamp("1970-10-01"), "z"));
EXPECT_EQ(
"India Standard Time",
formatDatetime(parseTimestamp("1970-10-01"), "zzzz"));

setQueryTimeZone("Asia/Kolkata");
// We don't support more than 3 'z's yet.
EXPECT_THROW(
formatDatetime(parseTimestamp("1970-01-01"), "zzzz"), VeloxRuntimeError);
// Test a long long name.
setQueryTimeZone("Australia/Eucla");
EXPECT_EQ("+0845", formatDatetime(parseTimestamp("1970-10-01"), "z"));
EXPECT_EQ(
"Australian Central Western Standard Time",
formatDatetime(parseTimestamp("1970-10-01"), "zzzz"));

setQueryTimeZone("Asia/Kolkata");
// Literal test cases.
EXPECT_EQ("hello", formatDatetime(parseTimestamp("1970-01-01"), "'hello'"));
EXPECT_EQ("'", formatDatetime(parseTimestamp("1970-01-01"), "''"));
Expand Down
4 changes: 3 additions & 1 deletion velox/type/tz/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ velox_link_libraries(
velox_external_date
Boost::regex
fmt::fmt
Folly::folly)
Folly::folly
ICU::i18n
ICU::uc)
32 changes: 32 additions & 0 deletions velox/type/tz/TimeZoneMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
#include <fmt/core.h>
#include <folly/container/F14Map.h>
#include <folly/container/F14Set.h>
#include <unicode/locid.h>
#include <unicode/timezone.h>
#include <unicode/tzfmt.h>
#include <unicode/unistr.h>
#include "velox/common/base/Exceptions.h"
#include "velox/common/testutil/TestValue.h"
#include "velox/external/date/tz.h"
Expand Down Expand Up @@ -365,4 +369,32 @@ std::string TimeZone::getShortName(TimeZone::seconds timestamp) const {
return tz_->get_info(date::local_seconds(timestamp)).first.abbrev;
}

std::string TimeZone::getLongName(TimeZone::seconds timestamp) const {
UErrorCode success = U_ZERO_ERROR;

static const icu::Locale locale("en", "US");
static const std::unique_ptr<icu::TimeZoneFormat> format(
icu::TimeZoneFormat::createInstance(locale, success));
VELOX_USER_CHECK_NOT_NULL(format);

// Get the ICU TimeZone by name
std::unique_ptr<icu::TimeZone> tz(icu::TimeZone::createTimeZone(
icu::UnicodeString(timeZoneName_.data(), timeZoneName_.length())));
VELOX_USER_CHECK_NOT_NULL(tz);

// Format the time zone to get the long name.
icu::UnicodeString longName;
format->format(
UTimeZoneFormatStyle::UTZFMT_STYLE_SPECIFIC_LONG,
*tz,
(double)timestamp.count() * 1000.0, // ICU expects a double
longName);

// Convert the UnicodeString back to a string and write it out
std::string longNameStr;
longName.toUTF8String(longNameStr);

return longNameStr;
}

} // namespace facebook::velox::tz
2 changes: 2 additions & 0 deletions velox/type/tz/TimeZoneMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ class TimeZone {

std::string getShortName(TimeZone::seconds timestamp) const;

std::string getLongName(TimeZone::seconds timestamp) const;

private:
const date::time_zone* tz_{nullptr};
const std::chrono::minutes offset_{0};
Expand Down

0 comments on commit 81b1219

Please sign in to comment.