From 98d52c15b1c0c96c33490825b541a3af770efc73 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Fri, 19 Jul 2024 20:20:52 +0800 Subject: [PATCH 1/4] support week of month --- velox/functions/lib/DateTimeFormatter.cpp | 25 +++++++++++-- velox/functions/lib/DateTimeFormatter.h | 5 ++- .../lib/DateTimeFormatterBuilder.cpp | 7 ++++ .../functions/lib/DateTimeFormatterBuilder.h | 10 ++++++ velox/type/TimestampConversion.cpp | 36 +++++++++++++++++++ velox/type/TimestampConversion.h | 9 +++++ 6 files changed, 89 insertions(+), 3 deletions(-) diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 3dd1e640f858..f62465a60d23 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -46,6 +46,9 @@ struct Date { int32_t dayOfYear = 1; bool dayOfYearFormat = false; + int32_t weekOfMonth = 1; + bool weekOfMonthDateFormat = false; + bool centuryFormat = false; bool isYearOfEra = false; // Year of era cannot be zero or negative. @@ -627,6 +630,8 @@ std::string getSpecifierName(DateTimeFormatSpecifier specifier) { return "TIMEZONE_OFFSET_ID"; case DateTimeFormatSpecifier::LITERAL_PERCENT: return "LITERAL_PERCENT"; + case DateTimeFormatSpecifier::WEEK_OF_MONTH: + return "WEEK_OF_MONTH"; default: { VELOX_UNREACHABLE("[Unexpected date format specifier]"); return ""; // Make compiler happy. @@ -735,7 +740,6 @@ int32_t parseFromPattern( return -1; } cur += size; - date.weekDateFormat = true; date.dayOfYearFormat = false; if (!date.hasYear) { date.hasYear = true; @@ -873,6 +877,7 @@ int32_t parseFromPattern( date.day = number; date.weekDateFormat = false; date.dayOfYearFormat = false; + date.weekOfMonthDateFormat = false; // Joda has this weird behavior where it returns 1970 as the year by // default (if no year is specified), but if either day or month are // specified, it fallsback to 2000. @@ -887,6 +892,7 @@ int32_t parseFromPattern( date.dayOfYear = number; date.dayOfYearFormat = true; date.weekDateFormat = false; + date.weekOfMonthDateFormat = false; // Joda has this weird behavior where it returns 1970 as the year by // default (if no year is specified), but if either day or month are // specified, it fallsback to 2000. @@ -959,6 +965,7 @@ int32_t parseFromPattern( date.weekDateFormat = true; date.dayOfYearFormat = false; date.centuryFormat = false; + date.weekOfMonthDateFormat = false; date.hasYear = true; break; @@ -969,6 +976,7 @@ int32_t parseFromPattern( date.week = number; date.weekDateFormat = true; date.dayOfYearFormat = false; + date.weekOfMonthDateFormat = false; if (!date.hasYear) { date.hasYear = true; date.year = 2000; @@ -980,13 +988,21 @@ int32_t parseFromPattern( return -1; } date.dayOfWeek = number; - date.weekDateFormat = true; date.dayOfYearFormat = false; if (!date.hasYear) { date.hasYear = true; date.year = 2000; } break; + case DateTimeFormatSpecifier::WEEK_OF_MONTH: + if (number < 1 || number > 5) { + return -1; + } + date.weekOfMonthDateFormat = true; + date.weekOfMonth = number; + date.weekDateFormat = false; + date.hasYear = true; + break; default: VELOX_NYI( @@ -1069,6 +1085,7 @@ uint32_t DateTimeFormatter::maxResultSize( // Not supported. case DateTimeFormatSpecifier::WEEK_YEAR: case DateTimeFormatSpecifier::WEEK_OF_WEEK_YEAR: + case DateTimeFormatSpecifier::WEEK_OF_MONTH: default: VELOX_UNSUPPORTED( "Date format specifier is not supported: {}", @@ -1321,6 +1338,7 @@ int32_t DateTimeFormatter::format( } case DateTimeFormatSpecifier::WEEK_YEAR: case DateTimeFormatSpecifier::WEEK_OF_WEEK_YEAR: + case DateTimeFormatSpecifier::WEEK_OF_MONTH: default: VELOX_UNSUPPORTED( "format is not supported for specifier {}", @@ -1423,6 +1441,9 @@ Expected DateTimeFormatter::parse( } else if (date.dayOfYearFormat) { status = util::daysSinceEpochFromDayOfYear( date.year, date.dayOfYear, daysSinceEpoch); + } else if (date.weekOfMonthDateFormat) { + status = util::daysSinceEpochFromWeekOfMonthDate( + date.year, date.month, date.weekOfMonth, date.dayOfWeek, daysSinceEpoch); } else { status = util::daysSinceEpochFromDate( date.year, date.month, date.day, daysSinceEpoch); diff --git a/velox/functions/lib/DateTimeFormatter.h b/velox/functions/lib/DateTimeFormatter.h index 32f3e38ba855..c855bf158764 100644 --- a/velox/functions/lib/DateTimeFormatter.h +++ b/velox/functions/lib/DateTimeFormatter.h @@ -100,7 +100,10 @@ enum class DateTimeFormatSpecifier : uint8_t { TIMEZONE_OFFSET_ID = 22, // A literal % character - LITERAL_PERCENT = 23 + LITERAL_PERCENT = 23, + + // Week of month, e.g: 2 + WEEK_OF_MONTH = 24 }; struct FormatPattern { diff --git a/velox/functions/lib/DateTimeFormatterBuilder.cpp b/velox/functions/lib/DateTimeFormatterBuilder.cpp index 5fa2b0bacdcd..aa80058ac8f3 100644 --- a/velox/functions/lib/DateTimeFormatterBuilder.cpp +++ b/velox/functions/lib/DateTimeFormatterBuilder.cpp @@ -56,6 +56,13 @@ DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendWeekOfWeekYear( return *this; } +DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendWeekOfMonth( + size_t minDigits) { + tokens_.emplace_back( + FormatPattern{DateTimeFormatSpecifier::WEEK_OF_MONTH, minDigits}); + return *this; +} + DateTimeFormatterBuilder& DateTimeFormatterBuilder::appendDayOfWeek0Based( size_t minDigits) { tokens_.emplace_back( diff --git a/velox/functions/lib/DateTimeFormatterBuilder.h b/velox/functions/lib/DateTimeFormatterBuilder.h index b5e27d1103ca..aa508869432e 100644 --- a/velox/functions/lib/DateTimeFormatterBuilder.h +++ b/velox/functions/lib/DateTimeFormatterBuilder.h @@ -88,6 +88,16 @@ class DateTimeFormatterBuilder { /// will be 001 DateTimeFormatterBuilder& appendWeekOfWeekYear(size_t minDigits); + /// Appends week of month to formatter builder, e.g: 2 + /// + /// \param minDigits describes the minimum number of digits this format is + /// required to represent week of month. The format by default is going + /// use as few digits as possible greater than or equal to minDigits to + /// represent week of month. e.g. 1999-01-01, with min digit being 1 the + /// formatted result will be 1, with min digit being 4 the formatted result + /// will be 0001 + DateTimeFormatterBuilder& appendWeekOfMonth(size_t minDigits); + /// Appends day of week to formatter builder. The number is 0 based with 0 ~ 6 /// representing Sunday to Saturday respectively /// diff --git a/velox/type/TimestampConversion.cpp b/velox/type/TimestampConversion.cpp index e5c5e3ce5f3d..c259cc1e87e5 100644 --- a/velox/type/TimestampConversion.cpp +++ b/velox/type/TimestampConversion.cpp @@ -144,6 +144,22 @@ bool isValidWeekDate(int32_t weekYear, int32_t weekOfYear, int32_t dayOfWeek) { return true; } +bool isValidWeekOfMonthDate(int32_t year, int32_t month, int32_t weekOfMonth, int32_t dayOfWeek) { + if (dayOfWeek < 1 || dayOfWeek > 7) { + return false; + } + if (weekOfMonth < 1 || weekOfMonth > 5) { + return false; + } + if (month < 1 || month > 12) { + return false; + } + if (year < kMinYear || year > kMaxYear) { + return false; + } + return true; +} + inline bool validDate(int64_t daysSinceEpoch) { return daysSinceEpoch >= std::numeric_limits::min() && daysSinceEpoch <= std::numeric_limits::max(); @@ -593,6 +609,26 @@ Status daysSinceEpochFromWeekDate( return Status::OK(); } +Status daysSinceEpochFromWeekOfMonthDate( + int32_t year, + int32_t month, + int32_t weekOfMonth, + int32_t dayOfWeek, + int64_t& out) { + if (!isValidWeekOfMonthDate(year, month, weekOfMonth, dayOfWeek)) { + return Status::UserError( + "Date out of range: {}-{}-{}-{}", year, month, weekOfMonth, dayOfWeek); + } + int64_t daysSinceEpochOfFirstDayOfMonth; + VELOX_RETURN_NOT_OK( + daysSinceEpochFromDate(year, month, 1, daysSinceEpochOfFirstDayOfMonth)); + int32_t firstDayOfWeek = + extractISODayOfTheWeek(daysSinceEpochOfFirstDayOfMonth); + out = daysSinceEpochOfFirstDayOfMonth - (firstDayOfWeek - 1) + + 7 * (weekOfMonth - 1) + dayOfWeek - 1; + return Status::OK(); +} + Status daysSinceEpochFromDayOfYear(int32_t year, int32_t dayOfYear, int64_t& out) { if (!isValidDayOfYear(year, dayOfYear)) { diff --git a/velox/type/TimestampConversion.h b/velox/type/TimestampConversion.h index 5f5e04cfcfe3..491645809a71 100644 --- a/velox/type/TimestampConversion.h +++ b/velox/type/TimestampConversion.h @@ -107,6 +107,15 @@ Status daysSinceEpochFromWeekDate( int32_t dayOfWeek, int64_t& out); +/// Computes the (signed) number of days since unix epoch (1970-01-01). +/// Returns UserError status if the date is invalid. +Status daysSinceEpochFromWeekOfMonthDate( + int32_t year, + int32_t month, + int32_t weekOfMonth, + int32_t dayOfWeek, + int64_t& out); + /// Computes the (signed) number of days since unix epoch (1970-01-01). /// Returns UserError status if the date is invalid. Status From a76c50b07a6303baba6c7e9d2224230241cc273d Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Mon, 22 Jul 2024 10:13:56 +0800 Subject: [PATCH 2/4] fix format --- velox/functions/lib/DateTimeFormatter.cpp | 6 +++++- velox/type/TimestampConversion.cpp | 8 ++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index f62465a60d23..1b3226d9f04d 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -1443,7 +1443,11 @@ Expected DateTimeFormatter::parse( date.year, date.dayOfYear, daysSinceEpoch); } else if (date.weekOfMonthDateFormat) { status = util::daysSinceEpochFromWeekOfMonthDate( - date.year, date.month, date.weekOfMonth, date.dayOfWeek, daysSinceEpoch); + date.year, + date.month, + date.weekOfMonth, + date.dayOfWeek, + daysSinceEpoch); } else { status = util::daysSinceEpochFromDate( date.year, date.month, date.day, daysSinceEpoch); diff --git a/velox/type/TimestampConversion.cpp b/velox/type/TimestampConversion.cpp index c259cc1e87e5..33cf73a11694 100644 --- a/velox/type/TimestampConversion.cpp +++ b/velox/type/TimestampConversion.cpp @@ -144,7 +144,11 @@ bool isValidWeekDate(int32_t weekYear, int32_t weekOfYear, int32_t dayOfWeek) { return true; } -bool isValidWeekOfMonthDate(int32_t year, int32_t month, int32_t weekOfMonth, int32_t dayOfWeek) { +bool isValidWeekOfMonthDate( + int32_t year, + int32_t month, + int32_t weekOfMonth, + int32_t dayOfWeek) { if (dayOfWeek < 1 || dayOfWeek > 7) { return false; } @@ -625,7 +629,7 @@ Status daysSinceEpochFromWeekOfMonthDate( int32_t firstDayOfWeek = extractISODayOfTheWeek(daysSinceEpochOfFirstDayOfMonth); out = daysSinceEpochOfFirstDayOfMonth - (firstDayOfWeek - 1) + - 7 * (weekOfMonth - 1) + dayOfWeek - 1; + 7 * (weekOfMonth - 1) + dayOfWeek - 1; return Status::OK(); } From 7ec5bf492ddff8cd8f01992e0a22eb3818eb1457 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Tue, 23 Jul 2024 17:27:54 +0800 Subject: [PATCH 3/4] add unit tests --- .../lib/tests/DateTimeFormatterTest.cpp | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/velox/functions/lib/tests/DateTimeFormatterTest.cpp b/velox/functions/lib/tests/DateTimeFormatterTest.cpp index c25b6fb5cd0d..b6bab8d1a76f 100644 --- a/velox/functions/lib/tests/DateTimeFormatterTest.cpp +++ b/velox/functions/lib/tests/DateTimeFormatterTest.cpp @@ -2304,4 +2304,35 @@ TEST_F(MysqlDateTimeTest, parseConsecutiveSpecifiers) { EXPECT_THROW(parseMysql("1212", "%Y%H"), VeloxUserError); } +class WeekOfMonthPatternTest : public DateTimeFormatterTest {}; + +TEST_F(WeekOfMonthPatternTest, parseWeekOfMonth) { + DateTimeFormatterBuilder builder1(1); + builder1.appendWeekOfMonth(1); + std::shared_ptr formatter1 = + builder1.setType(DateTimeFormatterType::JODA).build(); + EXPECT_EQ( + fromTimestampString("1969-12-29 00:00:00"), + formatter1->parse("1")->timestamp); + EXPECT_EQ( + fromTimestampString("1970-01-12 00:00:00"), + formatter1->parse("3")->timestamp); + + DateTimeFormatterBuilder builder2(11); + builder2.appendYear(4); + builder2.appendLiteral(" "); + builder2.appendMonthOfYear(2); + builder2.appendLiteral(" "); + builder2.appendWeekOfMonth(1); + builder2.appendLiteral(" "); + builder2.appendDayOfWeek1Based(1); + std::shared_ptr formatter2 = + builder2.setType(DateTimeFormatterType::JODA).build(); + EXPECT_EQ( + fromTimestampString("1999-11-29 00:00:00"), + formatter2->parse("1999 12 1 1")->timestamp); + EXPECT_EQ( + fromTimestampString("1999-12-14 00:00:00"), + formatter2->parse("1999 12 3 2")->timestamp); +} } // namespace facebook::velox::functions From e6d51ae2cb91630a5d9cbf63a83721722b49bca9 Mon Sep 17 00:00:00 2001 From: NEUpanning Date: Thu, 25 Jul 2024 16:49:37 +0800 Subject: [PATCH 4/4] add unit tests --- velox/type/tests/TimestampConversionTest.cpp | 29 ++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/velox/type/tests/TimestampConversionTest.cpp b/velox/type/tests/TimestampConversionTest.cpp index 2ea96844790e..3a1468939bb0 100644 --- a/velox/type/tests/TimestampConversionTest.cpp +++ b/velox/type/tests/TimestampConversionTest.cpp @@ -104,6 +104,35 @@ TEST(DateTimeUtilTest, fromDateInvalid) { 1970, 6, 31, "Date out of range: 1970-6-31")); } +TEST(DateTimeUtilTest, fromWeekOfMonthDate) { + auto testDaysSinceEpochFromWeekOfMonthDate = + [](int32_t year, int32_t month, int32_t weekOfMonth, int32_t dayOfWeek) { + int64_t daysSinceEpoch; + auto status = util::daysSinceEpochFromWeekOfMonthDate( + year, month, weekOfMonth, dayOfWeek, daysSinceEpoch); + EXPECT_TRUE(status.ok()); + return daysSinceEpoch; + }; + EXPECT_EQ(4, testDaysSinceEpochFromWeekOfMonthDate(1970, 1, 2, 1)); + EXPECT_EQ(361, testDaysSinceEpochFromWeekOfMonthDate(1971, 1, 1, 1)); + EXPECT_EQ(396, testDaysSinceEpochFromWeekOfMonthDate(1971, 2, 1, 1)); + + EXPECT_EQ(10952, testDaysSinceEpochFromWeekOfMonthDate(2000, 1, 1, 1)); + EXPECT_EQ(19905, testDaysSinceEpochFromWeekOfMonthDate(2024, 7, 1, 1)); + + // Before unix epoch. + EXPECT_EQ(-3, testDaysSinceEpochFromWeekOfMonthDate(1970, 1, 1, 1)); + EXPECT_EQ(-2, testDaysSinceEpochFromWeekOfMonthDate(1970, 1, 1, 2)); + EXPECT_EQ(-31, testDaysSinceEpochFromWeekOfMonthDate(1969, 12, 1, 1)); + EXPECT_EQ(-367, testDaysSinceEpochFromWeekOfMonthDate(1969, 1, 1, 1)); + EXPECT_EQ(-724, testDaysSinceEpochFromWeekOfMonthDate(1968, 1, 2, 1)); + EXPECT_EQ(-719533, testDaysSinceEpochFromWeekOfMonthDate(0, 1, 1, 1)); + + // Negative year - BC. + EXPECT_EQ(-719561, testDaysSinceEpochFromWeekOfMonthDate(-1, 12, 1, 1)); + EXPECT_EQ(-719897, testDaysSinceEpochFromWeekOfMonthDate(-1, 1, 1, 1)); +} + TEST(DateTimeUtilTest, fromDateString) { for (ParseMode mode : {ParseMode::kPrestoCast, ParseMode::kSparkCast}) { EXPECT_EQ(0, parseDate("1970-01-01", mode));