diff --git a/velox/type/TimestampConversion.cpp b/velox/type/TimestampConversion.cpp index e5c5e3ce5f3d..477fcb585be5 100644 --- a/velox/type/TimestampConversion.cpp +++ b/velox/type/TimestampConversion.cpp @@ -44,6 +44,7 @@ #include #include "velox/common/base/CheckedArithmetic.h" #include "velox/common/base/Exceptions.h" +#include "velox/type/HugeInt.h" #include "velox/type/tz/TimeZoneMap.h" namespace facebook::velox::util { @@ -144,6 +145,53 @@ bool isValidWeekDate(int32_t weekYear, int32_t weekOfYear, int32_t dayOfWeek) { return true; } +bool isValidWeekOfMonthDate( + int32_t year, + int32_t month, + int32_t weekOfMonth, + int32_t dayOfWeek) { + if (year < 1 || year > kMaxYear) { + return false; + } + if (month < 1 || month > 12) { + return false; + } + + int64_t daysSinceEpochOfFirstDayOfMonth; + const Status status = + daysSinceEpochFromDate(year, month, 1, daysSinceEpochOfFirstDayOfMonth); + if (!status.ok()) { + return false; + } + + // Calculates the actual number of week of month and validates if it is in the + // valid range. + const int32_t firstDayOfWeek = + extractISODayOfTheWeek(daysSinceEpochOfFirstDayOfMonth); + const int32_t firstWeekLength = 7 - firstDayOfWeek + 1; + const int32_t monthLength = + isLeapYear(year) ? kLeapDays[month] : kNormalDays[month]; + const int32_t actualWeeks = 1 + ceil((monthLength - firstWeekLength) / 7.0); + if (weekOfMonth < 1 || weekOfMonth > actualWeeks) { + return false; + } + + // Validate day of week. + // If dayOfWeek is before the first day of week, it is considered invalid. + if (weekOfMonth == 1 && dayOfWeek < firstDayOfWeek) { + return false; + } + const int32_t lastWeekLength = (monthLength - firstWeekLength) % 7; + // If dayOfWeek is after the last day of the last week of the month, it is + // considered invalid. + if (weekOfMonth == actualWeeks && lastWeekLength != 0 && + dayOfWeek > lastWeekLength) { + return false; + } + + return true; +} + inline bool validDate(int64_t daysSinceEpoch) { return daysSinceEpoch >= std::numeric_limits::min() && daysSinceEpoch <= std::numeric_limits::max(); @@ -554,7 +602,11 @@ daysSinceEpochFromDate(int32_t year, int32_t month, int32_t day, int64_t& out) { int64_t daysSinceEpoch = 0; if (!isValidDate(year, month, day)) { - return Status::UserError("Date out of range: {}-{}-{}", year, month, day); + if (threadSkipErrorDetails()) { + return Status::UserError(); + } else { + return Status::UserError("Date out of range: {}-{}-{}", year, month, day); + } } while (year < 1970) { year += kYearInterval; @@ -593,6 +645,58 @@ Status daysSinceEpochFromWeekDate( return Status::OK(); } +Expected daysSinceEpochFromWeekOfMonthDate( + int32_t year, + int32_t month, + int32_t weekOfMonth, + int32_t dayOfWeek, + bool lenient) { + if (!lenient && + !isValidWeekOfMonthDate(year, month, weekOfMonth, dayOfWeek)) { + if (threadSkipErrorDetails()) { + return folly::makeUnexpected(Status::UserError()); + } else { + return folly::makeUnexpected(Status::UserError( + "Date out of range: {}-{}-{}-{}", + year, + month, + weekOfMonth, + dayOfWeek)); + } + } + + // Adjusts the year and month to ensure month is within the range 1-12, + // accounting for overflow or underflow. + int32_t additionYears = 0; + if (month < 1) { + additionYears = month / 12 - 1; + month = 12 - abs(month) % 12; + } else if (month > 12) { + additionYears = (month - 1) / 12; + month = (month - 1) % 12 + 1; + } + year += additionYears; + + int64_t daysSinceEpochOfFirstDayOfMonth; + const Status status = + daysSinceEpochFromDate(year, month, 1, daysSinceEpochOfFirstDayOfMonth); + if (!status.ok()) { + return folly::makeUnexpected(status); + } + const int32_t firstDayOfWeek = + extractISODayOfTheWeek(daysSinceEpochOfFirstDayOfMonth); + int32_t days; + if (dayOfWeek < 1) { + days = 7 - abs(dayOfWeek - 1) % 7; + } else if (dayOfWeek > 7) { + days = (dayOfWeek - 1) % 7; + } else { + days = dayOfWeek % 7; + } + return daysSinceEpochOfFirstDayOfMonth - (firstDayOfWeek - 1) + + 7 * (weekOfMonth - 1) + days - 1; +} + Status daysSinceEpochFromDayOfYear(int32_t year, int32_t dayOfYear, int64_t& out) { if (!isValidDayOfYear(year, dayOfYear)) { @@ -643,7 +747,7 @@ Expected fromDateString(const char* str, size_t len, ParseMode mode) { return daysSinceEpoch; } -int32_t extractISODayOfTheWeek(int32_t daysSinceEpoch) { +int32_t extractISODayOfTheWeek(int64_t daysSinceEpoch) { // date of 0 is 1970-01-01, which was a Thursday (4) // -7 = 4 // -6 = 5 @@ -662,10 +766,10 @@ int32_t extractISODayOfTheWeek(int32_t daysSinceEpoch) { // 7 = 4 if (daysSinceEpoch < 0) { // negative date: start off at 4 and cycle downwards - return (7 - ((-int64_t(daysSinceEpoch) + 3) % 7)); + return (7 - ((-int128_t(daysSinceEpoch) + 3) % 7)); } else { // positive date: start off at 4 and cycle upwards - return ((int64_t(daysSinceEpoch) + 3) % 7) + 1; + return ((int128_t(daysSinceEpoch) + 3) % 7) + 1; } } diff --git a/velox/type/TimestampConversion.h b/velox/type/TimestampConversion.h index 5f5e04cfcfe3..1a621f624391 100644 --- a/velox/type/TimestampConversion.h +++ b/velox/type/TimestampConversion.h @@ -107,6 +107,38 @@ Status daysSinceEpochFromWeekDate( int32_t dayOfWeek, int64_t& out); +/// Computes the signed number of days since the Unix epoch (1970-01-01). To +/// align with Spark's SimpleDateFormat behavior, this function offers two +/// modes: lenient and strict. For strict mode, dates before Jan 1, 1 +/// are not supported, and it returns an error status if the date is invalid. +/// For lenient mode, it accepts a wider range of arguments. +/// @param year Year. For strict mode, it should be in the range [1, +/// 292278994]. e.g: 1996, 2024. For lenient mode, it should be in the range +/// [-292275055, 292278994]. +/// @param month Month of year. For strict mode, it should be in the range +/// [1, 12]. For example, 1 is January, 7 is July. For lenient mode, values +/// greater than 12 wrap around to the start of the year, and values less than 1 +/// count backward from December. For example, 13 corresponds to January of the +/// following year and -1 corresponds to November of the previous year. +/// @param weekOfMonth Week of the month. For strict mode, it should be in +/// the range [1, depends on month]. For example, 1 is 1st week, 3 is 3rd week. +/// For lenient mode, we consider days of the previous or next months as part of +/// the specified weekOfMonth. For example, if weekOfMonth is 5 but the current +/// month only has 4 weeks (such as February), the first week of March will be +/// considered as the 5th week of February. +/// @param dayOfWeek Day number of week. For strict mode, it should be in +/// the range [1, depends on month]. For example, 1 is Monday, 7 is Sunday. For +/// lenient mode, we consider days of the previous or next months as part of the +/// specified dayOfWeek.For example, if weekOfMonth is 1 and dayOfWeek is 1 but +/// the month's first day is Saturday, the Monday of the last week of the +/// previous month will be used. +Expected daysSinceEpochFromWeekOfMonthDate( + int32_t year, + int32_t month, + int32_t weekOfMonth, + int32_t dayOfWeek, + bool lenient); + /// Computes the (signed) number of days since unix epoch (1970-01-01). /// Returns UserError status if the date is invalid. Status @@ -123,7 +155,7 @@ inline Expected fromDateString(const StringView& str, ParseMode mode) { } // Extracts the day of the week from the number of days since epoch -int32_t extractISODayOfTheWeek(int32_t daysSinceEpoch); +int32_t extractISODayOfTheWeek(int64_t daysSinceEpoch); /// Time conversions. diff --git a/velox/type/tests/TimestampConversionTest.cpp b/velox/type/tests/TimestampConversionTest.cpp index 2ea96844790e..77b9d39e20a1 100644 --- a/velox/type/tests/TimestampConversionTest.cpp +++ b/velox/type/tests/TimestampConversionTest.cpp @@ -104,6 +104,111 @@ TEST(DateTimeUtilTest, fromDateInvalid) { 1970, 6, 31, "Date out of range: 1970-6-31")); } +TEST(DateTimeUtilTest, daysSinceEpochFromWeekOfMonthDateLenient) { + auto daysSinceEpoch = + [](int32_t year, int32_t month, int32_t weekOfMonth, int32_t dayOfWeek) { + auto result = util::daysSinceEpochFromWeekOfMonthDate( + year, month, weekOfMonth, dayOfWeek, true); + EXPECT_TRUE(!result.hasError()); + return result.value(); + }; + + EXPECT_EQ(4, daysSinceEpoch(1970, 1, 2, 1)); + EXPECT_EQ(361, daysSinceEpoch(1971, 1, 1, 1)); + EXPECT_EQ(396, daysSinceEpoch(1971, 2, 1, 1)); + + EXPECT_EQ(10952, daysSinceEpoch(2000, 1, 1, 1)); + EXPECT_EQ(19905, daysSinceEpoch(2024, 7, 1, 1)); + + // Before unix epoch. + EXPECT_EQ(-3, daysSinceEpoch(1970, 1, 1, 1)); + EXPECT_EQ(-2, daysSinceEpoch(1970, 1, 1, 2)); + EXPECT_EQ(-31, daysSinceEpoch(1969, 12, 1, 1)); + EXPECT_EQ(-367, daysSinceEpoch(1969, 1, 1, 1)); + EXPECT_EQ(-724, daysSinceEpoch(1968, 1, 2, 1)); + EXPECT_EQ(-719533, daysSinceEpoch(0, 1, 1, 1)); + + // Negative year - BC. + EXPECT_EQ(-719561, daysSinceEpoch(-1, 12, 1, 1)); + EXPECT_EQ(-719897, daysSinceEpoch(-1, 1, 1, 1)); + + // Day in the previous month. + EXPECT_EQ(19783, daysSinceEpoch(2024, 2, 5, 5)); + // Day in the next month. + EXPECT_EQ(19751, daysSinceEpoch(2024, 2, 1, 1)); + + // Out of range day of week. + EXPECT_EQ(338, daysSinceEpoch(1970, 12, 1, 0)); + EXPECT_EQ(337, daysSinceEpoch(1970, 12, 1, -1)); + EXPECT_EQ(337, daysSinceEpoch(1970, 12, 1, -8)); + + EXPECT_EQ(332, daysSinceEpoch(1970, 12, 1, 8)); + EXPECT_EQ(333, daysSinceEpoch(1970, 12, 1, 9)); + EXPECT_EQ(336, daysSinceEpoch(1970, 12, 1, 19)); + + // Out of range month. + EXPECT_EQ(-3, daysSinceEpoch(1970, 1, 1, 1)); + EXPECT_EQ(207, daysSinceEpoch(1970, 8, 1, 1)); + EXPECT_EQ(361, daysSinceEpoch(1970, 13, 1, 1)); + + EXPECT_EQ(-31, daysSinceEpoch(1970, 0, 1, 1)); + EXPECT_EQ(-66, daysSinceEpoch(1970, -1, 1, 1)); + EXPECT_EQ(-430, daysSinceEpoch(1970, -13, 1, 1)); + + // Out of range year. + auto result = + util::daysSinceEpochFromWeekOfMonthDate(292278995, 1, 1, 1, true); + EXPECT_EQ(result.error().message(), "Date out of range: 292278995-1-1"); +} + +TEST(DateTimeUtilTest, extractISODayOfTheWeek) { + EXPECT_EQ( + 4, util::extractISODayOfTheWeek(std::numeric_limits::max())); + EXPECT_EQ( + 3, util::extractISODayOfTheWeek(std::numeric_limits::min())); + EXPECT_EQ(1, util::extractISODayOfTheWeek(-10)); + EXPECT_EQ(7, util::extractISODayOfTheWeek(10)); +} + +TEST(DateTimeUtilTest, daysSinceEpochFromWeekOfMonthDateStrict) { + auto daysSinceEpochReturnError = [](int32_t year, + int32_t month, + int32_t weekOfMonth, + int32_t dayOfWeek, + const std::string& error) { + auto result = util::daysSinceEpochFromWeekOfMonthDate( + year, month, weekOfMonth, dayOfWeek, false); + EXPECT_TRUE(result.error().isUserError()); + EXPECT_EQ(result.error().message(), error); + }; + + EXPECT_NO_THROW(daysSinceEpochReturnError( + 292278995, 1, 1, 1, "Date out of range: 292278995-1-1-1")); + EXPECT_NO_THROW(daysSinceEpochReturnError( + 2024, 0, 1, 1, "Date out of range: 2024-0-1-1")); + EXPECT_NO_THROW(daysSinceEpochReturnError( + 2024, 13, 1, 1, "Date out of range: 2024-13-1-1")); + EXPECT_NO_THROW(daysSinceEpochReturnError( + 2024, 1, 6, 1, "Date out of range: 2024-1-6-1")); + EXPECT_NO_THROW(daysSinceEpochReturnError( + 2024, 2, 1, 1, "Date out of range: 2024-2-1-1")); + EXPECT_NO_THROW(daysSinceEpochReturnError( + 2024, 2, 5, 5, "Date out of range: 2024-2-5-5")); + + auto daysSinceEpochReturnValues = + [](int32_t year, int32_t month, int32_t weekOfMonth, int32_t dayOfWeek) { + auto result = util::daysSinceEpochFromWeekOfMonthDate( + year, month, weekOfMonth, dayOfWeek, false); + EXPECT_TRUE(!result.hasError()); + return result.value(); + }; + + EXPECT_EQ(-724, daysSinceEpochReturnValues(1968, 1, 2, 1)); + EXPECT_EQ(4, daysSinceEpochReturnValues(1970, 1, 2, 1)); + EXPECT_EQ(396, daysSinceEpochReturnValues(1971, 2, 1, 1)); + EXPECT_EQ(19905, daysSinceEpochReturnValues(2024, 7, 1, 1)); +} + TEST(DateTimeUtilTest, fromDateString) { for (ParseMode mode : {ParseMode::kPrestoCast, ParseMode::kSparkCast}) { EXPECT_EQ(0, parseDate("1970-01-01", mode));