Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for "week of month" in SimpleDateTimeFormatter #11103

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 60 additions & 6 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,14 @@ struct Date {
int32_t dayOfYear = 1;
bool dayOfYearFormat = false;

int32_t weekOfMonth = 1;
bool weekOfMonthDateFormat = false;

bool centuryFormat = false;

bool isYearOfEra = false; // Year of era cannot be zero or negative.
bool hasYear = false; // Whether year was explicitly specified.
bool hasDayOfWeek = false; // Whether dayOfWeek was explicitly specified.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unrelated to this PR, but maybe we should use std::optional to prevent these flags controlling what has been explicitly set and what not.

Copy link
Contributor Author

@NEUpanning NEUpanning Oct 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your suggestion! Using std::optional would be a more elegant solution to avoid using boolean flags to control whether a value has been explicitly set. I'd like to work on it. Should I open an issue for this first?


int32_t hour = 0;
int32_t minute = 0;
Expand Down Expand Up @@ -471,6 +475,7 @@ int64_t parseDayOfWeekText(const char* cur, const char* end, Date& date) {
auto it = dayOfWeekMap.find(std::string_view(cur, 3));
if (it != dayOfWeekMap.end()) {
date.dayOfWeek = it->second.second;
date.hasDayOfWeek = true;
if (end - cur >= it->second.first.size() + 3) {
if (std::strncmp(
cur + 3, it->second.first.data(), it->second.first.size()) ==
Expand Down Expand Up @@ -627,6 +632,8 @@ std::string_view getSpecifierName(DateTimeFormatSpecifier specifier) {
return "TIMEZONE_OFFSET_ID";
case DateTimeFormatSpecifier::LITERAL_PERCENT:
return "LITERAL_PERCENT";
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
return "WEEK_OF_MONTH";
default: {
VELOX_UNREACHABLE("[Unexpected date format specifier]");
return ""; // Make compiler happy.
Expand All @@ -643,6 +650,7 @@ int getMaxDigitConsume(
case DateTimeFormatSpecifier::CENTURY_OF_ERA:
case DateTimeFormatSpecifier::DAY_OF_WEEK_1_BASED:
case DateTimeFormatSpecifier::FRACTION_OF_SECOND:
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
return curPattern.minRepresentDigits;

case DateTimeFormatSpecifier::YEAR_OF_ERA:
Expand Down Expand Up @@ -735,7 +743,9 @@ int32_t parseFromPattern(
return -1;
}
cur += size;
date.weekDateFormat = true;
if (!date.weekOfMonthDateFormat) {
date.weekDateFormat = true;
}
date.dayOfYearFormat = false;
if (!date.hasYear) {
date.hasYear = true;
Expand Down Expand Up @@ -853,8 +863,10 @@ int32_t parseFromPattern(
break;

case DateTimeFormatSpecifier::MONTH_OF_YEAR:
if (number < 1 || number > 12) {
return -1;
if (type != DateTimeFormatterType::LENIENT_SIMPLE) {
if (number < 1 || number > 12) {
return -1;
}
}
date.month = number;
date.weekDateFormat = false;
Expand All @@ -873,6 +885,7 @@ int32_t parseFromPattern(
date.day = number;
date.weekDateFormat = false;
date.dayOfYearFormat = false;
date.weekOfMonthDateFormat = false;
// Joda has this weird behavior where it returns 1970 as the year by
// default (if no year is specified), but if either day or month are
// specified, it fallsback to 2000.
Expand All @@ -887,6 +900,7 @@ int32_t parseFromPattern(
date.dayOfYear = number;
date.dayOfYearFormat = true;
date.weekDateFormat = false;
date.weekOfMonthDateFormat = false;
// Joda has this weird behavior where it returns 1970 as the year by
// default (if no year is specified), but if either day or month are
// specified, it fallsback to 2000.
Expand Down Expand Up @@ -959,6 +973,7 @@ int32_t parseFromPattern(
date.weekDateFormat = true;
date.dayOfYearFormat = false;
date.centuryFormat = false;
date.weekOfMonthDateFormat = false;
date.hasYear = true;
break;

Expand All @@ -969,24 +984,40 @@ int32_t parseFromPattern(
date.week = number;
date.weekDateFormat = true;
date.dayOfYearFormat = false;
date.weekOfMonthDateFormat = false;
if (!date.hasYear) {
date.hasYear = true;
date.year = 2000;
}
break;

case DateTimeFormatSpecifier::DAY_OF_WEEK_1_BASED:
if (number < 1 || number > 7) {
return -1;
if (type != DateTimeFormatterType::LENIENT_SIMPLE) {
if (number < 1 || number > 7) {
return -1;
}
}
date.dayOfWeek = number;
date.weekDateFormat = true;
date.hasDayOfWeek = true;
if (!date.weekOfMonthDateFormat) {
date.weekDateFormat = true;
}
date.dayOfYearFormat = false;
if (!date.hasYear) {
date.hasYear = true;
date.year = 2000;
}
break;
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
date.weekOfMonthDateFormat = true;
date.weekOfMonth = number;
date.weekDateFormat = false;
date.hasYear = true;
// For week of month date format, the default value of dayOfWeek is 7.
if (!date.hasDayOfWeek) {
date.dayOfWeek = 7;
}
break;

default:
VELOX_NYI(
Expand Down Expand Up @@ -1018,6 +1049,7 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const {
break;
case DateTimeFormatSpecifier::DAY_OF_WEEK_0_BASED:
case DateTimeFormatSpecifier::DAY_OF_WEEK_1_BASED:
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
size += std::max((int)token.pattern.minRepresentDigits, 1);
break;
case DateTimeFormatSpecifier::DAY_OF_WEEK_TEXT:
Expand Down Expand Up @@ -1328,6 +1360,18 @@ int32_t DateTimeFormatter::format(
result);
break;
}
case DateTimeFormatSpecifier::WEEK_OF_MONTH: {
result += padContent(
ceil(
(7 + static_cast<unsigned>(calDate.day()) -
weekday.c_encoding() - 1) /
7.0),
'0',
token.pattern.minRepresentDigits,
maxResultEnd,
result);
break;
}
case DateTimeFormatSpecifier::WEEK_YEAR:
default:
VELOX_UNSUPPORTED(
Expand Down Expand Up @@ -1430,6 +1474,13 @@ Expected<DateTimeResult> DateTimeFormatter::parse(
} else if (date.dayOfYearFormat) {
daysSinceEpoch =
util::daysSinceEpochFromDayOfYear(date.year, date.dayOfYear);
} else if (date.weekOfMonthDateFormat) {
daysSinceEpoch = util::daysSinceEpochFromWeekOfMonthDate(
date.year,
date.month,
date.weekOfMonth,
date.dayOfWeek,
this->type_ == DateTimeFormatterType::LENIENT_SIMPLE);
} else {
daysSinceEpoch =
util::daysSinceEpochFromDate(date.year, date.month, date.day);
Expand Down Expand Up @@ -1792,6 +1843,9 @@ std::shared_ptr<DateTimeFormatter> buildSimpleDateTimeFormatter(
case 'w':
builder.appendWeekOfWeekYear(count);
break;
case 'W':
builder.appendWeekOfMonth(count);
break;
case 'x':
builder.appendWeekYear(count);
break;
Expand Down
5 changes: 4 additions & 1 deletion velox/functions/lib/DateTimeFormatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ enum class DateTimeFormatSpecifier : uint8_t {
TIMEZONE_OFFSET_ID = 22,

// A literal % character
LITERAL_PERCENT = 23
LITERAL_PERCENT = 23,

// Week of month based on java.text.SimpleDateFormat, e.g: 2
WEEK_OF_MONTH = 24
};

struct FormatPattern {
Expand Down
7 changes: 7 additions & 0 deletions velox/functions/lib/DateTimeFormatterBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendWeekOfWeekYear(
return *this;
}

DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendWeekOfMonth(
size_t minDigits) {
tokens_.emplace_back(
FormatPattern{DateTimeFormatSpecifier::WEEK_OF_MONTH, minDigits});
return *this;
}

DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendDayOfWeek0Based(
size_t minDigits) {
tokens_.emplace_back(
Expand Down
10 changes: 10 additions & 0 deletions velox/functions/lib/DateTimeFormatterBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ class DateTimeFormatterBuilder {
/// will be 001
DateTimeFormatterBuilder& appendWeekOfWeekYear(size_t minDigits);

/// Appends week of month to formatter builder, e.g: 2
///
/// \param minDigits describes the minimum number of digits this format is
/// required to represent week of month. The format by default is going
/// use as few digits as possible greater than or equal to minDigits to
/// represent week of month. e.g. 1999-01-01, with min digit being 1 the
/// formatted result will be 1, with min digit being 4 the formatted result
/// will be 0001
DateTimeFormatterBuilder& appendWeekOfMonth(size_t minDigits);

/// Appends day of week to formatter builder. The number is 0 based with 0 ~ 6
/// representing Sunday to Saturday respectively
///
Expand Down
124 changes: 124 additions & 0 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2303,4 +2303,128 @@ TEST_F(MysqlDateTimeTest, parseConsecutiveSpecifiers) {
EXPECT_THROW(parseMysql("1212", "%Y%H"), VeloxUserError);
}

class SimpleDateTimeFormatterTest : public DateTimeFormatterTest {
protected:
DateTimeResult parseSimple(
const std::string_view& input,
const std::string_view& format,
bool lenient) {
auto dateTimeResultExpected =
buildSimpleDateTimeFormatter(format, lenient)->parse(input);
return dateTimeResult(dateTimeResultExpected);
}

std::string formatSimpleDateTime(
const std::string& format,
const Timestamp& timestamp,
const tz::TimeZone* timezone,
bool lenient) const {
auto formatter = buildSimpleDateTimeFormatter(format, lenient);
const auto maxSize = formatter->maxResultSize(timezone);
std::string result(maxSize, '\0');
auto resultSize =
formatter->format(timestamp, timezone, maxSize, result.data());
result.resize(resultSize);
return result;
}
};

TEST_F(SimpleDateTimeFormatterTest, validSimpleBuild) {
// W specifier case.
std::vector<DateTimeToken> expected = {
DateTimeToken(FormatPattern{DateTimeFormatSpecifier::WEEK_OF_MONTH, 1})};
EXPECT_EQ(expected, buildSimpleDateTimeFormatter("W", true)->tokens());
EXPECT_EQ(expected, buildSimpleDateTimeFormatter("W", false)->tokens());
}

TEST_F(SimpleDateTimeFormatterTest, parseSimpleWeekOfMonth) {
// Common cases for lenient and strict mode.
for (bool lenient : {true, false}) {
// Format contains year, month, weekOfMonth and dayOfWeek.
EXPECT_EQ(
fromTimestampString("2024-08-02"),
parseSimple("2024 08 01 5", "yyyy MM WW e", lenient).timestamp);
EXPECT_EQ(
fromTimestampString("2024-08-03"),
parseSimple("2024 08 01 6", "yyyy MM WW e", lenient).timestamp);
EXPECT_EQ(
fromTimestampString("2024-08-09"),
parseSimple("2024 08 02 5", "yyyy MM WW e", lenient).timestamp);
EXPECT_EQ(
fromTimestampString("2024-08-12"),
parseSimple("2024 08 03 1", "yyyy MM WW e", lenient).timestamp);

// Format contains year, month and weekOfMonth.
EXPECT_EQ(
fromTimestampString("2024-07-28"),
parseSimple("2024 08 01", "yyyy MM WW", lenient).timestamp);

// Format contains year and weekOfMonth.
EXPECT_EQ(
fromTimestampString("2023-12-31"),
parseSimple("2024 01", "yyyy WW", lenient).timestamp);

// Format contains weekOfMonth.
EXPECT_EQ(
fromTimestampString("1969-12-28"),
parseSimple("1", "W", lenient).timestamp);
}

// Field out of range for lenient mode.
EXPECT_EQ(
fromTimestampString("2024-09-30"),
parseSimple("2024 08 10 1", "yyyy MM WW e", true).timestamp);
EXPECT_EQ(
fromTimestampString("2024-07-28"),
parseSimple("2024 08 01", "yyyy MM WW", true).timestamp);
EXPECT_EQ(
fromTimestampString("2025-02-24"),
parseSimple("2024 15 01 1", "yyyy MM WW e", true).timestamp);
EXPECT_EQ(
fromTimestampString("2024-07-29"),
parseSimple("2024 08 01 9", "yyyy MM WW e", true).timestamp);

// Field out of range for strict mode.
EXPECT_THROW(
parseSimple("2024 08 10 1", "yyyy MM WW e", false), VeloxUserError);
EXPECT_THROW(parseSimple("2024 08 10", "yyyy MM WW", false), VeloxUserError);
EXPECT_THROW(
parseSimple("2024 15 01 1", "yyyy MM WW e", false), VeloxUserError);
EXPECT_THROW(
parseSimple("2024 08 01 9", "yyyy MM WW e", false), VeloxUserError);
}

TEST_F(SimpleDateTimeFormatterTest, formatResultSize) {
EXPECT_EQ(
buildSimpleDateTimeFormatter("WW", false)->maxResultSize(nullptr), 2);
EXPECT_EQ(
buildSimpleDateTimeFormatter("WW", true)->maxResultSize(nullptr), 2);
}

TEST_F(SimpleDateTimeFormatterTest, formatWeekOfMonth) {
auto* timezone = tz::locateZone("GMT");
for (bool lenient : {true, false}) {
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-01"), timezone, lenient),
"1");
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-10"), timezone, lenient),
"2");
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-11"), timezone, lenient),
"3");
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-15"), timezone, lenient),
"3");
EXPECT_EQ(
formatSimpleDateTime(
"W", fromTimestampString("2024-08-30"), timezone, lenient),
"5");
}
}

} // namespace facebook::velox::functions
Loading