Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for "week of month" in date parsing pattern #10511

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ struct Date {
int32_t dayOfYear = 1;
bool dayOfYearFormat = false;

int32_t weekOfMonth = 1;
bool weekOfMonthDateFormat = false;

bool centuryFormat = false;

bool isYearOfEra = false; // Year of era cannot be zero or negative.
Expand Down Expand Up @@ -627,6 +630,8 @@ std::string getSpecifierName(DateTimeFormatSpecifier specifier) {
return "TIMEZONE_OFFSET_ID";
case DateTimeFormatSpecifier::LITERAL_PERCENT:
return "LITERAL_PERCENT";
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
return "WEEK_OF_MONTH";
default: {
VELOX_UNREACHABLE("[Unexpected date format specifier]");
return ""; // Make compiler happy.
Expand Down Expand Up @@ -735,7 +740,6 @@ int32_t parseFromPattern(
return -1;
}
cur += size;
date.weekDateFormat = true;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder why we need this change, and could you clarify what is covered by weekDateFormat which seems to be a larger scope than weekOfMonthDateFormat?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

weekDateFormat has different format with weekOfMonthDateFormat.weekDateFormat is composed of year, weekOfYear and dayOfWeek and weekOfMonthDateFormat is composed of year, month, weekOfMonth and dayOfWeek. dayOfWeek is used by both weekDateFormat and weekOfMonthDateFormat, so dayOfWeek can't indicate the format is weekDateFormat

date.dayOfYearFormat = false;
if (!date.hasYear) {
date.hasYear = true;
Expand Down Expand Up @@ -873,6 +877,7 @@ int32_t parseFromPattern(
date.day = number;
date.weekDateFormat = false;
date.dayOfYearFormat = false;
date.weekOfMonthDateFormat = false;
// Joda has this weird behavior where it returns 1970 as the year by
// default (if no year is specified), but if either day or month are
// specified, it fallsback to 2000.
Expand All @@ -887,6 +892,7 @@ int32_t parseFromPattern(
date.dayOfYear = number;
date.dayOfYearFormat = true;
date.weekDateFormat = false;
date.weekOfMonthDateFormat = false;
// Joda has this weird behavior where it returns 1970 as the year by
// default (if no year is specified), but if either day or month are
// specified, it fallsback to 2000.
Expand Down Expand Up @@ -959,6 +965,7 @@ int32_t parseFromPattern(
date.weekDateFormat = true;
date.dayOfYearFormat = false;
date.centuryFormat = false;
date.weekOfMonthDateFormat = false;
date.hasYear = true;
break;

Expand All @@ -969,6 +976,7 @@ int32_t parseFromPattern(
date.week = number;
date.weekDateFormat = true;
date.dayOfYearFormat = false;
date.weekOfMonthDateFormat = false;
if (!date.hasYear) {
date.hasYear = true;
date.year = 2000;
Expand All @@ -980,13 +988,21 @@ int32_t parseFromPattern(
return -1;
}
date.dayOfWeek = number;
date.weekDateFormat = true;
date.dayOfYearFormat = false;
if (!date.hasYear) {
date.hasYear = true;
date.year = 2000;
}
break;
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
if (number < 1 || number > 5) {
return -1;
}
date.weekOfMonthDateFormat = true;
date.weekOfMonth = number;
date.weekDateFormat = false;
date.hasYear = true;
break;

default:
VELOX_NYI(
Expand Down Expand Up @@ -1069,6 +1085,7 @@ uint32_t DateTimeFormatter::maxResultSize(
// Not supported.
case DateTimeFormatSpecifier::WEEK_YEAR:
case DateTimeFormatSpecifier::WEEK_OF_WEEK_YEAR:
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
default:
VELOX_UNSUPPORTED(
"Date format specifier is not supported: {}",
Expand Down Expand Up @@ -1321,6 +1338,7 @@ int32_t DateTimeFormatter::format(
}
case DateTimeFormatSpecifier::WEEK_YEAR:
case DateTimeFormatSpecifier::WEEK_OF_WEEK_YEAR:
case DateTimeFormatSpecifier::WEEK_OF_MONTH:
default:
VELOX_UNSUPPORTED(
"format is not supported for specifier {}",
Expand Down Expand Up @@ -1423,6 +1441,13 @@ Expected<DateTimeResult> DateTimeFormatter::parse(
} else if (date.dayOfYearFormat) {
status = util::daysSinceEpochFromDayOfYear(
date.year, date.dayOfYear, daysSinceEpoch);
} else if (date.weekOfMonthDateFormat) {
status = util::daysSinceEpochFromWeekOfMonthDate(
date.year,
date.month,
date.weekOfMonth,
date.dayOfWeek,
daysSinceEpoch);
} else {
status = util::daysSinceEpochFromDate(
date.year, date.month, date.day, daysSinceEpoch);
Expand Down
5 changes: 4 additions & 1 deletion velox/functions/lib/DateTimeFormatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ enum class DateTimeFormatSpecifier : uint8_t {
TIMEZONE_OFFSET_ID = 22,

// A literal % character
LITERAL_PERCENT = 23
LITERAL_PERCENT = 23,

// Week of month, e.g: 2
WEEK_OF_MONTH = 24
};

struct FormatPattern {
Expand Down
7 changes: 7 additions & 0 deletions velox/functions/lib/DateTimeFormatterBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendWeekOfWeekYear(
return *this;
}

DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendWeekOfMonth(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this function be called in buildJodaDateTimeFormatter or buildMysqlDateTimeFormatter? Like https://github.com/facebookincubator/velox/blob/main/velox/functions/lib/DateTimeFormatter.cpp#L1463.

Copy link
Contributor Author

@NEUpanning NEUpanning Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. "Week Of Month" format is not supported in Joda and Mysql.

size_t minDigits) {
tokens_.emplace_back(
FormatPattern{DateTimeFormatSpecifier::WEEK_OF_MONTH, minDigits});
return *this;
}

DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendDayOfWeek0Based(
size_t minDigits) {
tokens_.emplace_back(
Expand Down
10 changes: 10 additions & 0 deletions velox/functions/lib/DateTimeFormatterBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ class DateTimeFormatterBuilder {
/// will be 001
DateTimeFormatterBuilder& appendWeekOfWeekYear(size_t minDigits);

/// Appends week of month to formatter builder, e.g: 2
///
/// \param minDigits describes the minimum number of digits this format is
/// required to represent week of month. The format by default is going
/// use as few digits as possible greater than or equal to minDigits to
/// represent week of month. e.g. 1999-01-01, with min digit being 1 the
/// formatted result will be 1, with min digit being 4 the formatted result
/// will be 0001
DateTimeFormatterBuilder& appendWeekOfMonth(size_t minDigits);

/// Appends day of week to formatter builder. The number is 0 based with 0 ~ 6
/// representing Sunday to Saturday respectively
///
Expand Down
31 changes: 31 additions & 0 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2304,4 +2304,35 @@ TEST_F(MysqlDateTimeTest, parseConsecutiveSpecifiers) {
EXPECT_THROW(parseMysql("1212", "%Y%H"), VeloxUserError);
}

class WeekOfMonthPatternTest : public DateTimeFormatterTest {};

TEST_F(WeekOfMonthPatternTest, parseWeekOfMonth) {
DateTimeFormatterBuilder builder1(1);
builder1.appendWeekOfMonth(1);
std::shared_ptr<DateTimeFormatter> formatter1 =
builder1.setType(DateTimeFormatterType::JODA).build();
EXPECT_EQ(
fromTimestampString("1969-12-29 00:00:00"),
formatter1->parse("1")->timestamp);
EXPECT_EQ(
fromTimestampString("1970-01-12 00:00:00"),
formatter1->parse("3")->timestamp);

DateTimeFormatterBuilder builder2(11);
builder2.appendYear(4);
builder2.appendLiteral(" ");
builder2.appendMonthOfYear(2);
builder2.appendLiteral(" ");
builder2.appendWeekOfMonth(1);
builder2.appendLiteral(" ");
builder2.appendDayOfWeek1Based(1);
std::shared_ptr<DateTimeFormatter> formatter2 =
builder2.setType(DateTimeFormatterType::JODA).build();
EXPECT_EQ(
fromTimestampString("1999-11-29 00:00:00"),
formatter2->parse("1999 12 1 1")->timestamp);
EXPECT_EQ(
fromTimestampString("1999-12-14 00:00:00"),
formatter2->parse("1999 12 3 2")->timestamp);
}
} // namespace facebook::velox::functions
40 changes: 40 additions & 0 deletions velox/type/TimestampConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,26 @@ bool isValidWeekDate(int32_t weekYear, int32_t weekOfYear, int32_t dayOfWeek) {
return true;
}

bool isValidWeekOfMonthDate(
int32_t year,
int32_t month,
int32_t weekOfMonth,
int32_t dayOfWeek) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add some unit tests for this function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added tests in e6d51ae

if (dayOfWeek < 1 || dayOfWeek > 7) {
return false;
}
if (weekOfMonth < 1 || weekOfMonth > 5) {
return false;
}
if (month < 1 || month > 12) {
return false;
}
if (year < kMinYear || year > kMaxYear) {
return false;
}
return true;
}

inline bool validDate(int64_t daysSinceEpoch) {
return daysSinceEpoch >= std::numeric_limits<int32_t>::min() &&
daysSinceEpoch <= std::numeric_limits<int32_t>::max();
Expand Down Expand Up @@ -593,6 +613,26 @@ Status daysSinceEpochFromWeekDate(
return Status::OK();
}

Status daysSinceEpochFromWeekOfMonthDate(
int32_t year,
int32_t month,
int32_t weekOfMonth,
int32_t dayOfWeek,
int64_t& out) {
if (!isValidWeekOfMonthDate(year, month, weekOfMonth, dayOfWeek)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isValidWeekOfMonthDate doesn't perform full validation. For example, it allows weekOfMonth = 5 and dayOfWeek = 6, but these may not exist for all months. Where / how do we handle these cases of invalid inputs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tested SimpleDateFormat and I see it can parse the "invalid" weekOfMonth and dayOfWeek etc. Perhaps we should remove the validation to align with SimpleDateFormat. Here is an example:

SimpleDateFormat sdf = new SimpleDateFormat("y W");
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
Date date = sdf.parse("2024 1");
System.out.println(date);
date = sdf.parse("2024 5");
System.out.println(date);
// invalid week of month
date = sdf.parse("2024 99");
System.out.println(date);

-------------console output--------------
Sun Dec 31 00:00:00 CST 2023
Sun Jan 28 00:00:00 CST 2024
Sun Nov 16 00:00:00 CST 2025

return Status::UserError(
"Date out of range: {}-{}-{}-{}", year, month, weekOfMonth, dayOfWeek);
}
int64_t daysSinceEpochOfFirstDayOfMonth;
VELOX_RETURN_NOT_OK(
daysSinceEpochFromDate(year, month, 1, daysSinceEpochOfFirstDayOfMonth));
int32_t firstDayOfWeek =
extractISODayOfTheWeek(daysSinceEpochOfFirstDayOfMonth);
out = daysSinceEpochOfFirstDayOfMonth - (firstDayOfWeek - 1) +
7 * (weekOfMonth - 1) + dayOfWeek - 1;
return Status::OK();
}

Status
daysSinceEpochFromDayOfYear(int32_t year, int32_t dayOfYear, int64_t& out) {
if (!isValidDayOfYear(year, dayOfYear)) {
Expand Down
9 changes: 9 additions & 0 deletions velox/type/TimestampConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@ Status daysSinceEpochFromWeekDate(
int32_t dayOfWeek,
int64_t& out);

/// Computes the (signed) number of days since unix epoch (1970-01-01).
/// Returns UserError status if the date is invalid.
Status daysSinceEpochFromWeekOfMonthDate(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps, extract this function into a separate PR and add a unit test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The DateTimeFormatter.parse function depends on this function. Do you mean to submit all changes to the DateTimeFormatter.parse function and this function as a separate PR?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this comment suggest us opening another PR which contains the implementation and tests for daysSinceEpochFromWeekOfMonthDate function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened a PR for this. #10604

int32_t year,
int32_t month,
int32_t weekOfMonth,
int32_t dayOfWeek,
int64_t& out);

/// Computes the (signed) number of days since unix epoch (1970-01-01).
/// Returns UserError status if the date is invalid.
Status
Expand Down
29 changes: 29 additions & 0 deletions velox/type/tests/TimestampConversionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,35 @@ TEST(DateTimeUtilTest, fromDateInvalid) {
1970, 6, 31, "Date out of range: 1970-6-31"));
}

TEST(DateTimeUtilTest, fromWeekOfMonthDate) {
auto testDaysSinceEpochFromWeekOfMonthDate =
[](int32_t year, int32_t month, int32_t weekOfMonth, int32_t dayOfWeek) {
int64_t daysSinceEpoch;
auto status = util::daysSinceEpochFromWeekOfMonthDate(
year, month, weekOfMonth, dayOfWeek, daysSinceEpoch);
EXPECT_TRUE(status.ok());
return daysSinceEpoch;
};
EXPECT_EQ(4, testDaysSinceEpochFromWeekOfMonthDate(1970, 1, 2, 1));
EXPECT_EQ(361, testDaysSinceEpochFromWeekOfMonthDate(1971, 1, 1, 1));
EXPECT_EQ(396, testDaysSinceEpochFromWeekOfMonthDate(1971, 2, 1, 1));

EXPECT_EQ(10952, testDaysSinceEpochFromWeekOfMonthDate(2000, 1, 1, 1));
EXPECT_EQ(19905, testDaysSinceEpochFromWeekOfMonthDate(2024, 7, 1, 1));

// Before unix epoch.
EXPECT_EQ(-3, testDaysSinceEpochFromWeekOfMonthDate(1970, 1, 1, 1));
EXPECT_EQ(-2, testDaysSinceEpochFromWeekOfMonthDate(1970, 1, 1, 2));
EXPECT_EQ(-31, testDaysSinceEpochFromWeekOfMonthDate(1969, 12, 1, 1));
EXPECT_EQ(-367, testDaysSinceEpochFromWeekOfMonthDate(1969, 1, 1, 1));
EXPECT_EQ(-724, testDaysSinceEpochFromWeekOfMonthDate(1968, 1, 2, 1));
EXPECT_EQ(-719533, testDaysSinceEpochFromWeekOfMonthDate(0, 1, 1, 1));

// Negative year - BC.
EXPECT_EQ(-719561, testDaysSinceEpochFromWeekOfMonthDate(-1, 12, 1, 1));
EXPECT_EQ(-719897, testDaysSinceEpochFromWeekOfMonthDate(-1, 1, 1, 1));
}

TEST(DateTimeUtilTest, fromDateString) {
for (ParseMode mode : {ParseMode::kPrestoCast, ParseMode::kSparkCast}) {
EXPECT_EQ(0, parseDate("1970-01-01", mode));
Expand Down
Loading