Skip to content

Commit

Permalink
Add helper function to return the number of days since epoch for a we…
Browse files Browse the repository at this point in the history
…ek-of-month date (#10604)

Summary:
This helper function is only used by Spark. To align with Spark's SimpleDateFormat behavior, this function offers two modes: lenient and non-lenient. For non-lenient mode, it returns an error status if the date is invalid. For lenient mode, it accepts a wider range of arguments.

Part of #10511

Pull Request resolved: #10604

Reviewed By: xiaoxmeng

Differential Revision: D62033315

Pulled By: bikramSingh91

fbshipit-source-id: f17e50c07272dc656038b221129aff6882bcf02c
  • Loading branch information
NEUpanning authored and facebook-github-bot committed Sep 4, 2024
1 parent 5f4e7e5 commit fe7fdac
Show file tree
Hide file tree
Showing 3 changed files with 246 additions and 5 deletions.
112 changes: 108 additions & 4 deletions velox/type/TimestampConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <folly/Expected.h>
#include "velox/common/base/CheckedArithmetic.h"
#include "velox/common/base/Exceptions.h"
#include "velox/type/HugeInt.h"
#include "velox/type/tz/TimeZoneMap.h"

namespace facebook::velox::util {
Expand Down Expand Up @@ -144,6 +145,53 @@ bool isValidWeekDate(int32_t weekYear, int32_t weekOfYear, int32_t dayOfWeek) {
return true;
}

bool isValidWeekOfMonthDate(
int32_t year,
int32_t month,
int32_t weekOfMonth,
int32_t dayOfWeek) {
if (year < 1 || year > kMaxYear) {
return false;
}
if (month < 1 || month > 12) {
return false;
}

int64_t daysSinceEpochOfFirstDayOfMonth;
const Status status =
daysSinceEpochFromDate(year, month, 1, daysSinceEpochOfFirstDayOfMonth);
if (!status.ok()) {
return false;
}

// Calculates the actual number of week of month and validates if it is in the
// valid range.
const int32_t firstDayOfWeek =
extractISODayOfTheWeek(daysSinceEpochOfFirstDayOfMonth);
const int32_t firstWeekLength = 7 - firstDayOfWeek + 1;
const int32_t monthLength =
isLeapYear(year) ? kLeapDays[month] : kNormalDays[month];
const int32_t actualWeeks = 1 + ceil((monthLength - firstWeekLength) / 7.0);
if (weekOfMonth < 1 || weekOfMonth > actualWeeks) {
return false;
}

// Validate day of week.
// If dayOfWeek is before the first day of week, it is considered invalid.
if (weekOfMonth == 1 && dayOfWeek < firstDayOfWeek) {
return false;
}
const int32_t lastWeekLength = (monthLength - firstWeekLength) % 7;
// If dayOfWeek is after the last day of the last week of the month, it is
// considered invalid.
if (weekOfMonth == actualWeeks && lastWeekLength != 0 &&
dayOfWeek > lastWeekLength) {
return false;
}

return true;
}

inline bool validDate(int64_t daysSinceEpoch) {
return daysSinceEpoch >= std::numeric_limits<int32_t>::min() &&
daysSinceEpoch <= std::numeric_limits<int32_t>::max();
Expand Down Expand Up @@ -554,7 +602,11 @@ daysSinceEpochFromDate(int32_t year, int32_t month, int32_t day, int64_t& out) {
int64_t daysSinceEpoch = 0;

if (!isValidDate(year, month, day)) {
return Status::UserError("Date out of range: {}-{}-{}", year, month, day);
if (threadSkipErrorDetails()) {
return Status::UserError();
} else {
return Status::UserError("Date out of range: {}-{}-{}", year, month, day);
}
}
while (year < 1970) {
year += kYearInterval;
Expand Down Expand Up @@ -593,6 +645,58 @@ Status daysSinceEpochFromWeekDate(
return Status::OK();
}

Expected<int64_t> daysSinceEpochFromWeekOfMonthDate(
int32_t year,
int32_t month,
int32_t weekOfMonth,
int32_t dayOfWeek,
bool lenient) {
if (!lenient &&
!isValidWeekOfMonthDate(year, month, weekOfMonth, dayOfWeek)) {
if (threadSkipErrorDetails()) {
return folly::makeUnexpected(Status::UserError());
} else {
return folly::makeUnexpected(Status::UserError(
"Date out of range: {}-{}-{}-{}",
year,
month,
weekOfMonth,
dayOfWeek));
}
}

// Adjusts the year and month to ensure month is within the range 1-12,
// accounting for overflow or underflow.
int32_t additionYears = 0;
if (month < 1) {
additionYears = month / 12 - 1;
month = 12 - abs(month) % 12;
} else if (month > 12) {
additionYears = (month - 1) / 12;
month = (month - 1) % 12 + 1;
}
year += additionYears;

int64_t daysSinceEpochOfFirstDayOfMonth;
const Status status =
daysSinceEpochFromDate(year, month, 1, daysSinceEpochOfFirstDayOfMonth);
if (!status.ok()) {
return folly::makeUnexpected(status);
}
const int32_t firstDayOfWeek =
extractISODayOfTheWeek(daysSinceEpochOfFirstDayOfMonth);
int32_t days;
if (dayOfWeek < 1) {
days = 7 - abs(dayOfWeek - 1) % 7;
} else if (dayOfWeek > 7) {
days = (dayOfWeek - 1) % 7;
} else {
days = dayOfWeek % 7;
}
return daysSinceEpochOfFirstDayOfMonth - (firstDayOfWeek - 1) +
7 * (weekOfMonth - 1) + days - 1;
}

Status
daysSinceEpochFromDayOfYear(int32_t year, int32_t dayOfYear, int64_t& out) {
if (!isValidDayOfYear(year, dayOfYear)) {
Expand Down Expand Up @@ -643,7 +747,7 @@ Expected<int32_t> fromDateString(const char* str, size_t len, ParseMode mode) {
return daysSinceEpoch;
}

int32_t extractISODayOfTheWeek(int32_t daysSinceEpoch) {
int32_t extractISODayOfTheWeek(int64_t daysSinceEpoch) {
// date of 0 is 1970-01-01, which was a Thursday (4)
// -7 = 4
// -6 = 5
Expand All @@ -662,10 +766,10 @@ int32_t extractISODayOfTheWeek(int32_t daysSinceEpoch) {
// 7 = 4
if (daysSinceEpoch < 0) {
// negative date: start off at 4 and cycle downwards
return (7 - ((-int64_t(daysSinceEpoch) + 3) % 7));
return (7 - ((-int128_t(daysSinceEpoch) + 3) % 7));
} else {
// positive date: start off at 4 and cycle upwards
return ((int64_t(daysSinceEpoch) + 3) % 7) + 1;
return ((int128_t(daysSinceEpoch) + 3) % 7) + 1;
}
}

Expand Down
34 changes: 33 additions & 1 deletion velox/type/TimestampConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,38 @@ Status daysSinceEpochFromWeekDate(
int32_t dayOfWeek,
int64_t& out);

/// Computes the signed number of days since the Unix epoch (1970-01-01). To
/// align with Spark's SimpleDateFormat behavior, this function offers two
/// modes: lenient and non-lenient. For non-lenient mode, dates before Jan 1, 1
/// are not supported, and it returns an error status if the date is invalid.
/// For lenient mode, it accepts a wider range of arguments.
/// @param year Year. For non-lenient mode, it should be in the range [1,
/// 292278994]. e.g: 1996, 2024. For lenient mode, it should be in the range
/// [-292275055, 292278994].
/// @param month Month of year. For non-lenient mode, it should be in the range
/// [1, 12]. For example, 1 is January, 7 is July. For lenient mode, values
/// greater than 12 wrap around to the start of the year, and values less than 1
/// count backward from December. For example, 13 corresponds to January of the
/// following year and -1 corresponds to November of the previous year.
/// @param weekOfMonth Week of the month. For non-lenient mode, it should be in
/// the range [1, depends on month]. For example, 1 is 1st week, 3 is 3rd week.
/// For lenient mode, we consider days of the previous or next months as part of
/// the specified weekOfMonth. For example, if weekOfMonth is 5 but the current
/// month only has 4 weeks (such as February), the first week of March will be
/// considered as the 5th week of February.
/// @param dayOfWeek Day number of week. For non-lenient mode, it should be in
/// the range [1, depends on month]. For example, 1 is Monday, 7 is Sunday. For
/// lenient mode, we consider days of the previous or next months as part of the
/// specified dayOfWeek.For example, if weekOfMonth is 1 and dayOfWeek is 1 but
/// the month's first day is Saturday, the Monday of the last week of the
/// previous month will be used.
Expected<int64_t> daysSinceEpochFromWeekOfMonthDate(
int32_t year,
int32_t month,
int32_t weekOfMonth,
int32_t dayOfWeek,
bool lenient);

/// Computes the (signed) number of days since unix epoch (1970-01-01).
/// Returns UserError status if the date is invalid.
Status
Expand All @@ -127,7 +159,7 @@ inline Expected<int32_t> fromDateString(const StringView& str, ParseMode mode) {
}

// Extracts the day of the week from the number of days since epoch
int32_t extractISODayOfTheWeek(int32_t daysSinceEpoch);
int32_t extractISODayOfTheWeek(int64_t daysSinceEpoch);

/// Time conversions.

Expand Down
105 changes: 105 additions & 0 deletions velox/type/tests/TimestampConversionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,111 @@ TEST(DateTimeUtilTest, fromDateInvalid) {
1970, 6, 31, "Date out of range: 1970-6-31"));
}

TEST(DateTimeUtilTest, daysSinceEpochFromWeekOfMonthDateLenient) {
auto daysSinceEpoch =
[](int32_t year, int32_t month, int32_t weekOfMonth, int32_t dayOfWeek) {
auto result = util::daysSinceEpochFromWeekOfMonthDate(
year, month, weekOfMonth, dayOfWeek, true);
EXPECT_TRUE(!result.hasError());
return result.value();
};

EXPECT_EQ(4, daysSinceEpoch(1970, 1, 2, 1));
EXPECT_EQ(361, daysSinceEpoch(1971, 1, 1, 1));
EXPECT_EQ(396, daysSinceEpoch(1971, 2, 1, 1));

EXPECT_EQ(10952, daysSinceEpoch(2000, 1, 1, 1));
EXPECT_EQ(19905, daysSinceEpoch(2024, 7, 1, 1));

// Before unix epoch.
EXPECT_EQ(-3, daysSinceEpoch(1970, 1, 1, 1));
EXPECT_EQ(-2, daysSinceEpoch(1970, 1, 1, 2));
EXPECT_EQ(-31, daysSinceEpoch(1969, 12, 1, 1));
EXPECT_EQ(-367, daysSinceEpoch(1969, 1, 1, 1));
EXPECT_EQ(-724, daysSinceEpoch(1968, 1, 2, 1));
EXPECT_EQ(-719533, daysSinceEpoch(0, 1, 1, 1));

// Negative year - BC.
EXPECT_EQ(-719561, daysSinceEpoch(-1, 12, 1, 1));
EXPECT_EQ(-719897, daysSinceEpoch(-1, 1, 1, 1));

// Day in the previous month.
EXPECT_EQ(19783, daysSinceEpoch(2024, 2, 5, 5));
// Day in the next month.
EXPECT_EQ(19751, daysSinceEpoch(2024, 2, 1, 1));

// Out of range day of week.
EXPECT_EQ(338, daysSinceEpoch(1970, 12, 1, 0));
EXPECT_EQ(337, daysSinceEpoch(1970, 12, 1, -1));
EXPECT_EQ(337, daysSinceEpoch(1970, 12, 1, -8));

EXPECT_EQ(332, daysSinceEpoch(1970, 12, 1, 8));
EXPECT_EQ(333, daysSinceEpoch(1970, 12, 1, 9));
EXPECT_EQ(336, daysSinceEpoch(1970, 12, 1, 19));

// Out of range month.
EXPECT_EQ(-3, daysSinceEpoch(1970, 1, 1, 1));
EXPECT_EQ(207, daysSinceEpoch(1970, 8, 1, 1));
EXPECT_EQ(361, daysSinceEpoch(1970, 13, 1, 1));

EXPECT_EQ(-31, daysSinceEpoch(1970, 0, 1, 1));
EXPECT_EQ(-66, daysSinceEpoch(1970, -1, 1, 1));
EXPECT_EQ(-430, daysSinceEpoch(1970, -13, 1, 1));

// Out of range year.
auto result =
util::daysSinceEpochFromWeekOfMonthDate(292278995, 1, 1, 1, true);
EXPECT_EQ(result.error().message(), "Date out of range: 292278995-1-1");
}

TEST(DateTimeUtilTest, extractISODayOfTheWeek) {
EXPECT_EQ(
4, util::extractISODayOfTheWeek(std::numeric_limits<int64_t>::max()));
EXPECT_EQ(
3, util::extractISODayOfTheWeek(std::numeric_limits<int64_t>::min()));
EXPECT_EQ(1, util::extractISODayOfTheWeek(-10));
EXPECT_EQ(7, util::extractISODayOfTheWeek(10));
}

TEST(DateTimeUtilTest, daysSinceEpochFromWeekOfMonthDateNonLenient) {
auto daysSinceEpochReturnError = [](int32_t year,
int32_t month,
int32_t weekOfMonth,
int32_t dayOfWeek,
const std::string& error) {
auto result = util::daysSinceEpochFromWeekOfMonthDate(
year, month, weekOfMonth, dayOfWeek, false);
EXPECT_TRUE(result.error().isUserError());
EXPECT_EQ(result.error().message(), error);
};

EXPECT_NO_THROW(daysSinceEpochReturnError(
292278995, 1, 1, 1, "Date out of range: 292278995-1-1-1"));
EXPECT_NO_THROW(daysSinceEpochReturnError(
2024, 0, 1, 1, "Date out of range: 2024-0-1-1"));
EXPECT_NO_THROW(daysSinceEpochReturnError(
2024, 13, 1, 1, "Date out of range: 2024-13-1-1"));
EXPECT_NO_THROW(daysSinceEpochReturnError(
2024, 1, 6, 1, "Date out of range: 2024-1-6-1"));
EXPECT_NO_THROW(daysSinceEpochReturnError(
2024, 2, 1, 1, "Date out of range: 2024-2-1-1"));
EXPECT_NO_THROW(daysSinceEpochReturnError(
2024, 2, 5, 5, "Date out of range: 2024-2-5-5"));

auto daysSinceEpochReturnValues =
[](int32_t year, int32_t month, int32_t weekOfMonth, int32_t dayOfWeek) {
auto result = util::daysSinceEpochFromWeekOfMonthDate(
year, month, weekOfMonth, dayOfWeek, false);
EXPECT_TRUE(!result.hasError());
return result.value();
};

EXPECT_EQ(-724, daysSinceEpochReturnValues(1968, 1, 2, 1));
EXPECT_EQ(4, daysSinceEpochReturnValues(1970, 1, 2, 1));
EXPECT_EQ(396, daysSinceEpochReturnValues(1971, 2, 1, 1));
EXPECT_EQ(19905, daysSinceEpochReturnValues(2024, 7, 1, 1));
}

TEST(DateTimeUtilTest, fromDateString) {
for (ParseMode mode : {ParseMode::kPrestoCast, ParseMode::kSparkCast}) {
EXPECT_EQ(0, parseDate("1970-01-01", mode));
Expand Down

0 comments on commit fe7fdac

Please sign in to comment.