Skip to content

Commit

Permalink
Add support for "week of month" in SimpleDateTimeFormatter (#11103)
Browse files Browse the repository at this point in the history
Summary:
`java.text.SimpleDateFormat` supports using 'week of month' to parse/format
date. The specifier of 'week of month' is 'W'. Now DateTimeFormatter supports 3
group of fields specifying the day within the year. They are following
combinations:

```
year + week + dayOfWeek
year + dayOfYear
year + month + day
```
This PR introduces a new combination that is
`year + month + weekOfMonth + dayOfWeek` and adds support for "week of month"
in SimpleDateTimeFormatter.

Relates issue : #10354

Pull Request resolved: #11103

Reviewed By: Yuhta, amitkdutta

Differential Revision: D64920551

Pulled By: pedroerp

fbshipit-source-id: 3db9b6d33783aac0ee41791aeac96142e63fb22a
  • Loading branch information
NEUpanning authored and facebook-github-bot committed Oct 26, 2024
1 parent 4049362 commit a1d923e
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 7 deletions.
66 changes: 60 additions & 6 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,14 @@ struct Date {
int32_t dayOfYear = 1;
bool dayOfYearFormat = false;

int32_t weekOfMonth = 1;
bool weekOfMonthDateFormat = false;

bool centuryFormat = false;

bool isYearOfEra = false; // Year of era cannot be zero or negative.
bool hasYear = false; // Whether year was explicitly specified.
bool hasDayOfWeek = false; // Whether dayOfWeek was explicitly specified.

int32_t hour = 0;
int32_t minute = 0;
Expand Down Expand Up @@ -456,6 +460,7 @@ int64_t parseDayOfWeekText(const char* cur, const char* end, Date& date) {
auto it = dayOfWeekMap.find(std::string_view(cur, 3));
if (it != dayOfWeekMap.end()) {
date.dayOfWeek = it->second.second;
date.hasDayOfWeek = true;
if (end - cur >= it->second.first.size() + 3) {
if (std::strncmp(
cur + 3, it->second.first.data(), it->second.first.size()) ==
Expand Down Expand Up @@ -612,6 +617,8 @@ std::string_view getSpecifierName(DateTimeFormatSpecifier specifier) {
return "TIMEZONE_OFFSET_ID";
case DateTimeFormatSpecifier::LITERAL_PERCENT:
return "LITERAL_PERCENT";
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
return "WEEK_OF_MONTH";
default: {
VELOX_UNREACHABLE("[Unexpected date format specifier]");
return ""; // Make compiler happy.
Expand All @@ -628,6 +635,7 @@ int getMaxDigitConsume(
case DateTimeFormatSpecifier::CENTURY_OF_ERA:
case DateTimeFormatSpecifier::DAY_OF_WEEK_1_BASED:
case DateTimeFormatSpecifier::FRACTION_OF_SECOND:
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
return curPattern.minRepresentDigits;

case DateTimeFormatSpecifier::YEAR_OF_ERA:
Expand Down Expand Up @@ -720,7 +728,9 @@ int32_t parseFromPattern(
return -1;
}
cur += size;
date.weekDateFormat = true;
if (!date.weekOfMonthDateFormat) {
date.weekDateFormat = true;
}
date.dayOfYearFormat = false;
if (!date.hasYear) {
date.hasYear = true;
Expand Down Expand Up @@ -838,8 +848,10 @@ int32_t parseFromPattern(
break;

case DateTimeFormatSpecifier::MONTH_OF_YEAR:
if (number < 1 || number > 12) {
return -1;
if (type != DateTimeFormatterType::LENIENT_SIMPLE) {
if (number < 1 || number > 12) {
return -1;
}
}
date.month = number;
date.weekDateFormat = false;
Expand All @@ -858,6 +870,7 @@ int32_t parseFromPattern(
date.day = number;
date.weekDateFormat = false;
date.dayOfYearFormat = false;
date.weekOfMonthDateFormat = false;
// Joda has this weird behavior where it returns 1970 as the year by
// default (if no year is specified), but if either day or month are
// specified, it fallsback to 2000.
Expand All @@ -872,6 +885,7 @@ int32_t parseFromPattern(
date.dayOfYear = number;
date.dayOfYearFormat = true;
date.weekDateFormat = false;
date.weekOfMonthDateFormat = false;
// Joda has this weird behavior where it returns 1970 as the year by
// default (if no year is specified), but if either day or month are
// specified, it fallsback to 2000.
Expand Down Expand Up @@ -944,6 +958,7 @@ int32_t parseFromPattern(
date.weekDateFormat = true;
date.dayOfYearFormat = false;
date.centuryFormat = false;
date.weekOfMonthDateFormat = false;
date.hasYear = true;
break;

Expand All @@ -954,24 +969,40 @@ int32_t parseFromPattern(
date.week = number;
date.weekDateFormat = true;
date.dayOfYearFormat = false;
date.weekOfMonthDateFormat = false;
if (!date.hasYear) {
date.hasYear = true;
date.year = 2000;
}
break;

case DateTimeFormatSpecifier::DAY_OF_WEEK_1_BASED:
if (number < 1 || number > 7) {
return -1;
if (type != DateTimeFormatterType::LENIENT_SIMPLE) {
if (number < 1 || number > 7) {
return -1;
}
}
date.dayOfWeek = number;
date.weekDateFormat = true;
date.hasDayOfWeek = true;
if (!date.weekOfMonthDateFormat) {
date.weekDateFormat = true;
}
date.dayOfYearFormat = false;
if (!date.hasYear) {
date.hasYear = true;
date.year = 2000;
}
break;
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
date.weekOfMonthDateFormat = true;
date.weekOfMonth = number;
date.weekDateFormat = false;
date.hasYear = true;
// For week of month date format, the default value of dayOfWeek is 7.
if (!date.hasDayOfWeek) {
date.dayOfWeek = 7;
}
break;

default:
VELOX_NYI(
Expand Down Expand Up @@ -1003,6 +1034,7 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const {
break;
case DateTimeFormatSpecifier::DAY_OF_WEEK_0_BASED:
case DateTimeFormatSpecifier::DAY_OF_WEEK_1_BASED:
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
size += std::max((int)token.pattern.minRepresentDigits, 1);
break;
case DateTimeFormatSpecifier::DAY_OF_WEEK_TEXT:
Expand Down Expand Up @@ -1321,6 +1353,18 @@ int32_t DateTimeFormatter::format(
result);
break;
}
case DateTimeFormatSpecifier::WEEK_OF_MONTH: {
result += padContent(
ceil(
(7 + static_cast<unsigned>(calDate.day()) -
weekday.c_encoding() - 1) /
7.0),
'0',
token.pattern.minRepresentDigits,
maxResultEnd,
result);
break;
}
case DateTimeFormatSpecifier::WEEK_YEAR:
default:
VELOX_UNSUPPORTED(
Expand Down Expand Up @@ -1423,6 +1467,13 @@ Expected<DateTimeResult> DateTimeFormatter::parse(
} else if (date.dayOfYearFormat) {
daysSinceEpoch =
util::daysSinceEpochFromDayOfYear(date.year, date.dayOfYear);
} else if (date.weekOfMonthDateFormat) {
daysSinceEpoch = util::daysSinceEpochFromWeekOfMonthDate(
date.year,
date.month,
date.weekOfMonth,
date.dayOfWeek,
this->type_ == DateTimeFormatterType::LENIENT_SIMPLE);
} else {
daysSinceEpoch =
util::daysSinceEpochFromDate(date.year, date.month, date.day);
Expand Down Expand Up @@ -1815,6 +1866,9 @@ Expected<std::shared_ptr<DateTimeFormatter>> buildSimpleDateTimeFormatter(
case 'w':
builder.appendWeekOfWeekYear(count);
break;
case 'W':
builder.appendWeekOfMonth(count);
break;
case 'x':
builder.appendWeekYear(count);
break;
Expand Down
5 changes: 4 additions & 1 deletion velox/functions/lib/DateTimeFormatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ enum class DateTimeFormatSpecifier : uint8_t {
TIMEZONE_OFFSET_ID = 22,

// A literal % character
LITERAL_PERCENT = 23
LITERAL_PERCENT = 23,

// Week of month based on java.text.SimpleDateFormat, e.g: 2
WEEK_OF_MONTH = 24
};

struct FormatPattern {
Expand Down
7 changes: 7 additions & 0 deletions velox/functions/lib/DateTimeFormatterBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendWeekOfWeekYear(
return *this;
}

DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendWeekOfMonth(
size_t minDigits) {
tokens_.emplace_back(
FormatPattern{DateTimeFormatSpecifier::WEEK_OF_MONTH, minDigits});
return *this;
}

DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendDayOfWeek0Based(
size_t minDigits) {
tokens_.emplace_back(
Expand Down
10 changes: 10 additions & 0 deletions velox/functions/lib/DateTimeFormatterBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ class DateTimeFormatterBuilder {
/// will be 001
DateTimeFormatterBuilder& appendWeekOfWeekYear(size_t minDigits);

/// Appends week of month to formatter builder, e.g: 2
///
/// \param minDigits describes the minimum number of digits this format is
/// required to represent week of month. The format by default is going
/// use as few digits as possible greater than or equal to minDigits to
/// represent week of month. e.g. 1999-01-01, with min digit being 1 the
/// formatted result will be 1, with min digit being 4 the formatted result
/// will be 0001
DateTimeFormatterBuilder& appendWeekOfMonth(size_t minDigits);

/// Appends day of week to formatter builder. The number is 0 based with 0 ~ 6
/// representing Sunday to Saturday respectively
///
Expand Down
124 changes: 124 additions & 0 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2317,4 +2317,128 @@ TEST_F(MysqlDateTimeTest, parseConsecutiveSpecifiers) {
EXPECT_THROW(parseMysql("1212", "%Y%H"), VeloxUserError);
}

class SimpleDateTimeFormatterTest : public DateTimeFormatterTest {
protected:
DateTimeResult parseSimple(
const std::string_view& input,
const std::string_view& format,
bool lenient) {
auto dateTimeResultExpected =
(*buildSimpleDateTimeFormatter(format, lenient))->parse(input);
return dateTimeResult(dateTimeResultExpected);
}

std::string formatSimpleDateTime(
const std::string& format,
const Timestamp& timestamp,
const tz::TimeZone* timezone,
bool lenient) const {
auto formatter = buildSimpleDateTimeFormatter(format, lenient).value();
const auto maxSize = formatter->maxResultSize(timezone);
std::string result(maxSize, '\0');
auto resultSize =
formatter->format(timestamp, timezone, maxSize, result.data());
result.resize(resultSize);
return result;
}
};

TEST_F(SimpleDateTimeFormatterTest, validSimpleBuild) {
// W specifier case.
std::vector<DateTimeToken> expected = {
DateTimeToken(FormatPattern{DateTimeFormatSpecifier::WEEK_OF_MONTH, 1})};
EXPECT_EQ(expected, (*buildSimpleDateTimeFormatter("W", true))->tokens());
EXPECT_EQ(expected, (*buildSimpleDateTimeFormatter("W", false))->tokens());
}

TEST_F(SimpleDateTimeFormatterTest, parseSimpleWeekOfMonth) {
// Common cases for lenient and strict mode.
for (bool lenient : {true, false}) {
// Format contains year, month, weekOfMonth and dayOfWeek.
EXPECT_EQ(
fromTimestampString("2024-08-02"),
parseSimple("2024 08 01 5", "yyyy MM WW e", lenient).timestamp);
EXPECT_EQ(
fromTimestampString("2024-08-03"),
parseSimple("2024 08 01 6", "yyyy MM WW e", lenient).timestamp);
EXPECT_EQ(
fromTimestampString("2024-08-09"),
parseSimple("2024 08 02 5", "yyyy MM WW e", lenient).timestamp);
EXPECT_EQ(
fromTimestampString("2024-08-12"),
parseSimple("2024 08 03 1", "yyyy MM WW e", lenient).timestamp);

// Format contains year, month and weekOfMonth.
EXPECT_EQ(
fromTimestampString("2024-07-28"),
parseSimple("2024 08 01", "yyyy MM WW", lenient).timestamp);

// Format contains year and weekOfMonth.
EXPECT_EQ(
fromTimestampString("2023-12-31"),
parseSimple("2024 01", "yyyy WW", lenient).timestamp);

// Format contains weekOfMonth.
EXPECT_EQ(
fromTimestampString("1969-12-28"),
parseSimple("1", "W", lenient).timestamp);
}

// Field out of range for lenient mode.
EXPECT_EQ(
fromTimestampString("2024-09-30"),
parseSimple("2024 08 10 1", "yyyy MM WW e", true).timestamp);
EXPECT_EQ(
fromTimestampString("2024-07-28"),
parseSimple("2024 08 01", "yyyy MM WW", true).timestamp);
EXPECT_EQ(
fromTimestampString("2025-02-24"),
parseSimple("2024 15 01 1", "yyyy MM WW e", true).timestamp);
EXPECT_EQ(
fromTimestampString("2024-07-29"),
parseSimple("2024 08 01 9", "yyyy MM WW e", true).timestamp);

// Field out of range for strict mode.
EXPECT_THROW(
parseSimple("2024 08 10 1", "yyyy MM WW e", false), VeloxUserError);
EXPECT_THROW(parseSimple("2024 08 10", "yyyy MM WW", false), VeloxUserError);
EXPECT_THROW(
parseSimple("2024 15 01 1", "yyyy MM WW e", false), VeloxUserError);
EXPECT_THROW(
parseSimple("2024 08 01 9", "yyyy MM WW e", false), VeloxUserError);
}

TEST_F(SimpleDateTimeFormatterTest, formatResultSize) {
EXPECT_EQ(
(*buildSimpleDateTimeFormatter("WW", false))->maxResultSize(nullptr), 2);
EXPECT_EQ(
(*buildSimpleDateTimeFormatter("WW", true))->maxResultSize(nullptr), 2);
}

TEST_F(SimpleDateTimeFormatterTest, formatWeekOfMonth) {
auto* timezone = tz::locateZone("GMT");
for (bool lenient : {true, false}) {
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-01"), timezone, lenient),
"1");
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-10"), timezone, lenient),
"2");
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-11"), timezone, lenient),
"3");
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-15"), timezone, lenient),
"3");
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-30"), timezone, lenient),
"5");
}
}

} // namespace facebook::velox::functions

0 comments on commit a1d923e

Please sign in to comment.