diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h index 630bebdc8fc0..aeaeea27c2e7 100644 --- a/velox/core/QueryConfig.h +++ b/velox/core/QueryConfig.h @@ -324,6 +324,22 @@ class QueryConfig { static constexpr const char* kSparkLegacyDateFormatter = "spark.legacy_date_formatter"; + /// The first day-of-week varies by culture. + /// firstDayOfWeek is a 1-based weekday number starting with Sunday. It + /// determines how week-based calendar works. For example, the ISO-8601 use + /// Monday (2) and the US uses Sunday (1). It should be set to match the + /// 'Calender.getFirstDayOfWeek()' in Java. Sunday (1) is used by default. + static constexpr const char* kSparkFirstDayOfWeek = + "spark.legacy_date_formatter.first_day_of_week"; + + /// The minimal number of days in the first week by culture. + /// The week that includes January 1st and has 'minimalDaysInFirstWeek' or + /// more days is referred to as week 1. It determines how week-based calendar + /// works. It should be set to match the + /// 'Calender.getMinimalDaysInFirstWeek()' in Java. 1 days is used by default. + static constexpr const char* kSparkMinimalDaysInFirstWeek = + "spark.legacy_date_formatter.minimal_days_in_first_week"; + /// The number of local parallel table writer operators per task. static constexpr const char* kTaskWriterCount = "task_writer_count"; @@ -817,6 +833,22 @@ class QueryConfig { return get(kSparkLegacyDateFormatter, false); } + uint8_t sparkFirstDayOfWeek() const { + auto value = get(kSparkFirstDayOfWeek, 1); + VELOX_USER_CHECK( + 1 <= value && value <= 7, + "firstDayOfWeek must be a number between 1 and 7"); + return static_cast(value); + } + + uint8_t sparkMinimalDaysInFirstWeek() const { + auto value = get(kSparkMinimalDaysInFirstWeek, 1); + VELOX_USER_CHECK( + 1 <= value && value <= 7, + "minimalDaysInFirstWeek must be a number between 1 and 7"); + return static_cast(value); + } + bool exprTrackCpuUsage() const { return get(kExprTrackCpuUsage, false); } diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 7f1008d6fb2f..63226c1cd08e 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -24,6 +24,7 @@ #include "velox/external/date/iso_week.h" #include "velox/external/date/tz.h" #include "velox/functions/lib/DateTimeFormatterBuilder.h" +#include "velox/functions/lib/TimeUtils.h" #include "velox/type/TimestampConversion.h" #include "velox/type/tz/TimeZoneMap.h" @@ -1173,6 +1174,7 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { size += 2; break; case DateTimeFormatSpecifier::YEAR_OF_ERA: + case DateTimeFormatSpecifier::WEEK_YEAR: // Timestamp is in [-32767-01-01, 32767-12-31] range. size += std::max((int)token.pattern.minRepresentDigits, 6); break; @@ -1243,7 +1245,6 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { } break; // Not supported. - case DateTimeFormatSpecifier::WEEK_YEAR: default: VELOX_UNSUPPORTED( "Date format specifier is not supported: {}", @@ -1540,7 +1541,22 @@ int32_t DateTimeFormatter::format( result); break; } - case DateTimeFormatSpecifier::WEEK_YEAR: + case DateTimeFormatSpecifier::WEEK_YEAR: { + auto year = getWeekYear( + static_cast(calDate.year()), + static_cast(calDate.month()), + static_cast(calDate.day()), + firstDayOfWeek_, + minimalDaysInFirstWeek_); + + result += padContent( + static_cast(year), + '0', + token.pattern.minRepresentDigits, + maxResultEnd, + result); + break; + } default: VELOX_UNSUPPORTED( "format is not supported for specifier {}", @@ -2045,14 +2061,11 @@ Expected> buildSimpleDateTimeFormatter( case 'W': builder.appendWeekOfMonth(count); break; - case 'x': - builder.appendWeekYear(count); - break; case 'y': builder.appendYear(count); break; case 'Y': - builder.appendYearOfEra(count); + builder.appendWeekYear(count); break; case 'z': builder.appendTimeZone(count); diff --git a/velox/functions/lib/DateTimeFormatter.h b/velox/functions/lib/DateTimeFormatter.h index 62678f0f0334..c0fbb2fdb4c9 100644 --- a/velox/functions/lib/DateTimeFormatter.h +++ b/velox/functions/lib/DateTimeFormatter.h @@ -215,11 +215,31 @@ class DateTimeFormatter { bool allowOverflow = false, const std::optional& zeroOffsetText = std::nullopt) const; + void setFirstDayOfWeek(uint8_t firstDayOfWeek) { + firstDayOfWeek_ = firstDayOfWeek; + } + + void setMinimalDaysInFirstWeek(uint8_t minimalDaysInFirstWeek) { + minimalDaysInFirstWeek_ = minimalDaysInFirstWeek; + } + private: std::unique_ptr literalBuf_; size_t bufSize_; std::vector tokens_; DateTimeFormatterType type_; + + /// The first day-of-week varies by culture. + /// firstDayOfWeek is a 1-based weekday number starting with Sunday. It + /// determines how week-based calendar works. For example, the ISO-8601 use + /// Monday (2) and the US uses Sunday (1). + uint8_t firstDayOfWeek_ = 2; + + /// The minimal number of days in the first week by culture. + /// The week that includes January 1st and has 'minimalDaysInFirstWeek' or + /// more days is referred to as week 1. It determines how week-based calendar + /// works. For example, the ISO-8601 use 4 days. + uint8_t minimalDaysInFirstWeek_ = 4; }; Expected> buildMysqlDateTimeFormatter( diff --git a/velox/functions/lib/TimeUtils.h b/velox/functions/lib/TimeUtils.h index 2924ced00481..6cd6574f18dd 100644 --- a/velox/functions/lib/TimeUtils.h +++ b/velox/functions/lib/TimeUtils.h @@ -20,6 +20,7 @@ #include "velox/external/date/date.h" #include "velox/external/date/iso_week.h" #include "velox/functions/Macros.h" +#include "velox/type/TimestampConversion.h" #include "velox/type/tz/TimeZoneMap.h" namespace facebook::velox::functions { @@ -123,4 +124,97 @@ struct InitSessionTimezone { timeZone_ = getTimeZoneFromConfig(config); } }; + +/// Return day-of-year (DOY) of the first `dayOfWeek` in the year. +/// +/// `dayOfWeek` is a 1-based weekday number starting with Sunday. +/// (1 = Sunday, 2 = Monday, ..., 7 = Saturday). +/// +/// If the `dayOfWeek` is Monday, it returns DOY of first Monday in +/// the year. The returned DOY is a number from 1 to 7. +FOLLY_ALWAYS_INLINE +uint32_t getDayOfFirstDayOfWeek(int32_t y, uint32_t dayOfWeek) { + auto firstDay = + date::year_month_day(date::year(y), date::month(1), date::day(1)); + auto weekday = date::weekday(firstDay).c_encoding() + 1; + + int32_t delta = dayOfWeek - weekday; + if (delta < 0) { + delta += 7; + } + + return delta + 1; +} + +/// Return the week year represented by Gregorian calendar for the given year, +/// month and day. +/// +/// getWeekYear only works with Gregorian calendar due to limitations in the +/// date library. As a result, dates before the Gregorian calendar +/// (1582-10-15) yields mismatched results. +/// +/// The week that includes January 1st and has 'minimalDaysInFirstWeek' or more +/// days is referred to as week 1. The starting day of the week is decided by +/// the `firstDayOfWeek`, which is a 1-based weekday number starting with +/// Sunday. +/// +/// For ISO 8601, `firstDayOfWeek` is 2 (Monday) and `minimalDaysInFirstWeek` +/// is 4. For legacy Spark, `firstDayOfWeek` is 1 (Sunday) and +/// `minimalDaysInFirstWeek` is 1. +/// +/// The algorithm refers to the getWeekYear algorithm in openjdk: +/// https://github.com/openjdk/jdk/blob/d9c67443f7d7f03efb2837b63ee2acc6113f737f/src/java.base/share/classes/java/util/GregorianCalendar.java#L2058 +FOLLY_ALWAYS_INLINE +int32_t getWeekYear( + int32_t y, + uint32_t m, + uint32_t d, + uint32_t firstDayOfWeek, + uint32_t minimalDaysInFirstWeek) { + const auto ymd = + date::year_month_day(date::year(y), date::month(m), date::day(d)); + const auto firstDayOfTheYear = + date::year_month_day(ymd.year(), date::month(1), date::day(1)); + const auto dayOfYear = + (date::sys_days{ymd} - date::sys_days{firstDayOfTheYear}).count() + 1; + const auto maxDayOfYear = util::isLeapYear(y) ? 366 : 365; + + // If this week does not cross the years (`7 < dayOfYear && dayOfYear < + // (maxDayOfYear - 6)`), the weekyear must be equal to the year. + // + // If some days of this week fall in the last year and `minimalDaysInFirstWeek + // < dayOfYear`, the number of days in this week in this year must be greater + // than minimalDaysInFirstWeek, so the weekyear must be equal to the year. + // + // Since minimalDaysInFirstWeek always no more than 7, these two conditions + // can be reduced to the following code. + if (dayOfYear > minimalDaysInFirstWeek && dayOfYear < (maxDayOfYear - 6)) { + return y; + } + + auto year = y; + // Day of beginning of first complete week of this year. + auto minDayOfYear = getDayOfFirstDayOfWeek(y, firstDayOfWeek); + if (dayOfYear >= minDayOfYear) { + // Day of ending of first week of the last year. + auto minDayOfYear = getDayOfFirstDayOfWeek(y + 1, firstDayOfWeek) - 1; + if (minDayOfYear == 0) { + minDayOfYear = 7; + } + + // If that week belongs to the next weekyear. + if (minDayOfYear >= minimalDaysInFirstWeek) { + // If dayOfYear is in that week. + int days = maxDayOfYear - dayOfYear + 1; + if (days <= (7 - minDayOfYear)) { + ++year; + } + } + } else if (minDayOfYear <= minimalDaysInFirstWeek) { + // Days of the first week in this year less then minimalDaysInFirstWeek + --year; + } + + return year; +} } // namespace facebook::velox::functions diff --git a/velox/functions/lib/tests/CMakeLists.txt b/velox/functions/lib/tests/CMakeLists.txt index 5715c0d4e890..fd6225fff2d3 100644 --- a/velox/functions/lib/tests/CMakeLists.txt +++ b/velox/functions/lib/tests/CMakeLists.txt @@ -25,6 +25,7 @@ add_executable( RepeatTest.cpp TDigestTest.cpp Utf8Test.cpp + TimeUtilsTest.cpp ZetaDistributionTest.cpp) add_test( diff --git a/velox/functions/lib/tests/DateTimeFormatterTest.cpp b/velox/functions/lib/tests/DateTimeFormatterTest.cpp index ae67088206ec..0bd355811c26 100644 --- a/velox/functions/lib/tests/DateTimeFormatterTest.cpp +++ b/velox/functions/lib/tests/DateTimeFormatterTest.cpp @@ -1357,6 +1357,26 @@ TEST_F(JodaDateTimeFormatterTest, betterErrorMessaging) { "Value 429 for dayOfMonth must be in the range [1,365] for year 2057 and month 2."); } +TEST_F(JodaDateTimeFormatterTest, formatWeekYear) { + DateTimeFormatterBuilder builder(10); + auto formatter = + builder.appendWeekYear(4).setType(DateTimeFormatterType::JODA).build(); + auto* timezone = tz::locateZone("GMT"); + const auto maxSize = formatter->maxResultSize(timezone); + + auto weekYear = [&](StringView time) { + std::string result(maxSize, '\0'); + auto resultSize = formatter->format( + fromTimestampString(time), timezone, maxSize, result.data()); + result.resize(resultSize); + return result; + }; + + EXPECT_EQ(weekYear("2019-12-31 00:00:00"), "2020"); + EXPECT_EQ(weekYear("2020-12-26 00:00:00"), "2020"); + EXPECT_EQ(weekYear("2021-01-01 00:00:00"), "2020"); +} + class MysqlDateTimeTest : public DateTimeFormatterTest {}; TEST_F(MysqlDateTimeTest, validBuild) { diff --git a/velox/functions/lib/tests/TimeUtilsTest.cpp b/velox/functions/lib/tests/TimeUtilsTest.cpp new file mode 100644 index 000000000000..17ea07ababca --- /dev/null +++ b/velox/functions/lib/tests/TimeUtilsTest.cpp @@ -0,0 +1,205 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/functions/lib/TimeUtils.h" +#include + +namespace facebook::velox::functions::test { + +class TimeUtilsTest : public testing::Test {}; + +TEST_F(TimeUtilsTest, getFirstDayOfWeek) { + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 1), 7); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 2), 1); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 3), 2); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 4), 3); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 5), 4); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 6), 5); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 7), 6); +} + +TEST_F(TimeUtilsTest, getWeakYear) { + EXPECT_EQ(getWeekYear(2017, 01, 01, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 01, 2, 4), 2016); // 2016W52 + EXPECT_EQ(getWeekYear(2017, 01, 02, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 02, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 03, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 03, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 04, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 04, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 05, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 05, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 06, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 06, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 07, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 07, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 12, 25, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 25, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 26, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 26, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 27, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 27, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 28, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 28, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 29, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 29, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 30, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 30, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 31, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2017, 12, 31, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2018, 01, 01, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 01, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 02, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 02, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 03, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 03, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 04, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 04, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 05, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 05, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 06, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 06, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 07, 1, 1), 2018); // 2018W2 + EXPECT_EQ(getWeekYear(2018, 01, 07, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 12, 25, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 25, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 26, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 26, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 27, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 27, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 28, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 28, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 29, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 29, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 30, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2018, 12, 30, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 31, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2018, 12, 31, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 01, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 01, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 02, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 02, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 03, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 03, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 04, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 04, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 05, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 05, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 06, 1, 1), 2019); // 2019W2 + EXPECT_EQ(getWeekYear(2019, 01, 06, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 07, 1, 1), 2019); // 2019W2 + EXPECT_EQ(getWeekYear(2019, 01, 07, 2, 4), 2019); // 2019W2 + EXPECT_EQ(getWeekYear(2019, 12, 25, 1, 1), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 25, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 26, 1, 1), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 26, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 27, 1, 1), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 27, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 28, 1, 1), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 28, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 29, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2019, 12, 29, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 30, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2019, 12, 30, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2019, 12, 31, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2019, 12, 31, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 01, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 01, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 02, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 02, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 03, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 03, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 04, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 04, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 05, 1, 1), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 01, 05, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 06, 1, 1), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 01, 06, 2, 4), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 01, 07, 1, 1), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 01, 07, 2, 4), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 12, 25, 1, 1), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 25, 2, 4), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 26, 1, 1), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 26, 2, 4), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 27, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 27, 2, 4), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 28, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 28, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2020, 12, 29, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 29, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2020, 12, 30, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 30, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2020, 12, 31, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 31, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2021, 01, 01, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 01, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2021, 01, 02, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 02, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2021, 01, 03, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 03, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2021, 01, 04, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 04, 2, 4), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 05, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 05, 2, 4), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 06, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 06, 2, 4), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 07, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 07, 2, 4), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 12, 25, 1, 1), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 25, 2, 4), 2021); // 2021W51 + EXPECT_EQ(getWeekYear(2021, 12, 26, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 26, 2, 4), 2021); // 2021W51 + EXPECT_EQ(getWeekYear(2021, 12, 27, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 27, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 28, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 28, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 29, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 29, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 30, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 30, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 31, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 31, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2022, 01, 01, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 01, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2022, 01, 02, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 02, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2022, 01, 03, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 03, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 04, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 04, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 05, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 05, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 06, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 06, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 07, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 07, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 12, 25, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 25, 2, 4), 2022); // 2022W51 + EXPECT_EQ(getWeekYear(2022, 12, 26, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 26, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 27, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 27, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 28, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 28, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 29, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 29, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 30, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 30, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 31, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 31, 2, 4), 2022); // 2022W52 +} + +} // namespace facebook::velox::functions::test diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 5ffc17c6810e..37fbb1118833 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3796,8 +3796,6 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { parseTimestamp("1970-01-01 00:00:00"), "YYYY-MM-dd HH:mm:ss")); // User format errors or unsupported errors. - EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "x"), VeloxUserError); EXPECT_THROW( formatDatetime(parseTimestamp("1970-01-01"), "q"), VeloxUserError); EXPECT_THROW( @@ -3881,6 +3879,24 @@ TEST_F(DateTimeFunctionsTest, dateFormat) { dateFormat( parseTimestamp("-2000-02-29 00:00:00.987"), "%Y-%m-%d %H:%i:%s.%f")); + // Week year cases. + EXPECT_EQ("2016", dateFormat(parseTimestamp("2017-01-01"), "%x")); + EXPECT_EQ("2017", dateFormat(parseTimestamp("2017-12-31"), "%x")); + EXPECT_EQ("2018", dateFormat(parseTimestamp("2018-01-01"), "%x")); + EXPECT_EQ("2019", dateFormat(parseTimestamp("2018-12-31"), "%x")); + EXPECT_EQ("2019", dateFormat(parseTimestamp("2019-01-01"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2019-12-30"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2019-12-31"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2020-01-01"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2020-12-31"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2021-01-01"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2021-01-02"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2021-01-03"), "%x")); + EXPECT_EQ("2021", dateFormat(parseTimestamp("2021-12-31"), "%x")); + EXPECT_EQ("2021", dateFormat(parseTimestamp("2022-01-01"), "%x")); + EXPECT_EQ("2021", dateFormat(parseTimestamp("2022-01-02"), "%x")); + EXPECT_EQ("2022", dateFormat(parseTimestamp("2022-12-31"), "%x")); + // Varying digit year cases. EXPECT_EQ("06", dateFormat(parseTimestamp("-6-06-20"), "%y")); EXPECT_EQ("-0006", dateFormat(parseTimestamp("-6-06-20"), "%Y")); @@ -4122,9 +4138,6 @@ TEST_F(DateTimeFunctionsTest, dateFormat) { VELOX_ASSERT_THROW( dateFormat(timestamp, "%X"), "Date format specifier is not supported: %X"); - VELOX_ASSERT_THROW( - dateFormat(timestamp, "%x"), - "Date format specifier is not supported: WEEK_YEAR"); } TEST_F(DateTimeFunctionsTest, dateFormatTimestampWithTimezone) { diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 4fa631701d5c..2e5fce4be908 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -26,45 +26,54 @@ namespace facebook::velox::functions::sparksql { namespace detail { -Expected> getDateTimeFormatter( - const std::string_view& format, - DateTimeFormatterType type) { - switch (type) { - case DateTimeFormatterType::STRICT_SIMPLE: - return buildSimpleDateTimeFormatter(format, /*lenient=*/false); - case DateTimeFormatterType::LENIENT_SIMPLE: - return buildSimpleDateTimeFormatter(format, /*lenient=*/true); - default: - return buildJodaDateTimeFormatter( +struct DateTimeFormatterProvider { + DateTimeFormatterProvider(const core::QueryConfig& config) + : legacyFormatter_(config.sparkLegacyDateFormatter()), + firstDayOfWeek_(config.sparkFirstDayOfWeek()), + minimalDaysInFirstWeek_(config.sparkMinimalDaysInFirstWeek()) {} + + // Creates datetime formatter based on the provided format string. When legacy + // formatter is used, returns nullptr for invalid format; otherwise, throws a + // user error. + // + // @param format The format string to be used for initializing the formatter. + // @param throwOnError Set false to always return nullptr when the format + // is incorrect. + // @return A shared pointer to a DateTimeFormatter. If the formatter + // initialization fails and the legacy formatter is used, nullptr is returned. + FOLLY_ALWAYS_INLINE std::shared_ptr get( + std::string_view format, + bool throwOnError = true) const { + Expected> formatter; + if (legacyFormatter_) { + formatter = buildSimpleDateTimeFormatter(format, /*lenient=*/false) + .then([this](std::shared_ptr f) { + f->setFirstDayOfWeek(firstDayOfWeek_); + f->setMinimalDaysInFirstWeek(minimalDaysInFirstWeek_); + return f; + }); + } else { + formatter = buildJodaDateTimeFormatter( std::string_view(format.data(), format.size())); - } -} - -// Creates datetime formatter based on the provided format string. When legacy -// formatter is used, returns nullptr for invalid format; otherwise, throws a -// user error. -// -// @param format The format string to be used for initializing the formatter. -// @param legacyFormatter Whether legacy formatter is used. -// @return A shared pointer to a DateTimeFormatter. If the formatter -// initialization fails and the legacy formatter is used, nullptr is returned. -inline std::shared_ptr initializeFormatter( - const std::string_view format, - bool legacyFormatter) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(format), - legacyFormatter ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); - // When legacy formatter is used, returns nullptr for invalid format; - // otherwise, throws a user error. - if (formatter.hasError()) { - if (legacyFormatter) { - return nullptr; } - VELOX_USER_FAIL(formatter.error().message()); + + // When legacy formatter is used, returns nullptr for invalid format + if (formatter.hasError()) { + if (legacyFormatter_ || !throwOnError) { + return nullptr; + } + + VELOX_USER_FAIL(formatter.error().message()); + } + + return formatter.value(); } - return formatter.value(); -} + + private: + const bool legacyFormatter_; + const uint8_t firstDayOfWeek_; + const uint8_t minimalDaysInFirstWeek_; +}; } // namespace detail template @@ -153,12 +162,11 @@ struct UnixTimestampParseFunction { const std::vector& /*inputTypes*/, const core::QueryConfig& config, const arg_type* /*input*/) { - auto formatter = detail::getDateTimeFormatter( - kDefaultFormat_, - config.sparkLegacyDateFormatter() ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); - VELOX_CHECK(!formatter.hasError(), "Default format should always be valid"); - format_ = formatter.value(); + formatterProvider_ = + std::make_shared(config); + auto formatter = formatterProvider_->get(kDefaultFormat_, false); + VELOX_CHECK(formatter, "Default format should always be valid"); + format_ = formatter; setTimezone(config); } @@ -191,6 +199,7 @@ struct UnixTimestampParseFunction { // Default if format is not specified, as per Spark documentation. constexpr static std::string_view kDefaultFormat_{"yyyy-MM-dd HH:mm:ss"}; std::shared_ptr format_; + std::shared_ptr formatterProvider_; const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // fallback to GMT. }; @@ -206,16 +215,15 @@ struct UnixTimestampParseWithFormatFunction const core::QueryConfig& config, const arg_type* /*input*/, const arg_type* format) { - legacyFormatter_ = config.sparkLegacyDateFormatter(); + this->formatterProvider_ = + std::make_shared(config); if (format != nullptr) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(format->data(), format->size()), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); - if (formatter.hasError()) { + auto formatter = this->formatterProvider_->get( + std::string_view(format->data(), format->size()), false); + if (!formatter) { invalidFormat_ = true; } else { - this->format_ = formatter.value(); + this->format_ = formatter; } isConstFormat_ = true; } @@ -232,14 +240,13 @@ struct UnixTimestampParseWithFormatFunction // Format error returns null. if (!isConstFormat_) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(format.data(), format.size()), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); - if (formatter.hasError()) { + VELOX_CHECK_NOT_NULL(this->formatterProvider_); + auto formatter = this->formatterProvider_->get( + std::string_view(format.data(), format.size()), false); + if (!formatter) { return false; } - this->format_ = formatter.value(); + this->format_ = formatter; } auto dateTimeResult = this->format_->parse(std::string_view(input.data(), input.size())); @@ -255,7 +262,6 @@ struct UnixTimestampParseWithFormatFunction private: bool isConstFormat_{false}; bool invalidFormat_{false}; - bool legacyFormatter_{false}; }; // Parses unix time in seconds to a formatted string. @@ -268,11 +274,11 @@ struct FromUnixtimeFunction { const core::QueryConfig& config, const arg_type* /*unixtime*/, const arg_type* format) { - legacyFormatter_ = config.sparkLegacyDateFormatter(); + formatterProvider_ = + std::make_shared(config); sessionTimeZone_ = getTimeZoneFromConfig(config); if (format != nullptr) { - auto formatter = detail::initializeFormatter( - std::string_view(*format), legacyFormatter_); + auto formatter = formatterProvider_->get(std::string_view(*format)); if (formatter) { formatter_ = formatter; maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); @@ -291,8 +297,7 @@ struct FromUnixtimeFunction { return false; } if (!isConstantTimeFormat_) { - auto formatter = detail::initializeFormatter( - std::string_view(format), legacyFormatter_); + auto formatter = formatterProvider_->get(std::string_view(format)); if (formatter) { formatter_ = formatter; maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); @@ -314,8 +319,8 @@ struct FromUnixtimeFunction { std::shared_ptr formatter_; uint32_t maxResultSize_; bool isConstantTimeFormat_{false}; - bool legacyFormatter_{false}; bool invalidFormat_{false}; + std::shared_ptr formatterProvider_; }; template @@ -389,14 +394,14 @@ struct GetTimestampFunction { const core::QueryConfig& config, const arg_type* /*input*/, const arg_type* format) { - legacyFormatter_ = config.sparkLegacyDateFormatter(); + formatterProvider_ = + std::make_shared(config); auto sessionTimezoneName = config.sessionTimezone(); if (!sessionTimezoneName.empty()) { sessionTimeZone_ = tz::locateZone(sessionTimezoneName); } if (format != nullptr) { - auto formatter = detail::initializeFormatter( - std::string_view(*format), legacyFormatter_); + auto formatter = formatterProvider_->get(std::string_view(*format)); if (formatter) { formatter_ = formatter; } else { @@ -413,8 +418,7 @@ struct GetTimestampFunction { return false; } if (!isConstantTimeFormat_) { - auto formatter = detail::initializeFormatter( - std::string_view(format), legacyFormatter_); + auto formatter = formatterProvider_->get(std::string_view(format)); if (formatter) { formatter_ = formatter; } else { @@ -438,10 +442,10 @@ struct GetTimestampFunction { return result.timezone ? result.timezone : sessionTimeZone_; } + std::shared_ptr formatterProvider_; std::shared_ptr formatter_{nullptr}; bool isConstantTimeFormat_{false}; const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // default to GMT. - bool legacyFormatter_{false}; bool invalidFormat_{false}; }; diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index acdb7dad4018..34f2c5074078 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -936,6 +936,71 @@ TEST_F(DateTimeFunctionsTest, fromUnixtime) { fromUnixTime(getUnixTime("2020-06-30 23:59:59"), "yyyy-MM-dd HH:mm:ss"), "2020-07-01 07:59:59"); + // Weekyear cases of ISO-8601 standard. + queryCtx_->testingOverrideConfigUnsafe({ + {core::QueryConfig::kSparkLegacyDateFormatter, "true"}, + {core::QueryConfig::kSparkFirstDayOfWeek, std::to_string(2)}, + {core::QueryConfig::kSparkMinimalDaysInFirstWeek, std::to_string(4)}, + }); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-01-01 00:00:00"), "YYYY"), "2016"); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-12-31 00:00:00"), "YYYY"), "2017"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-01-01 00:00:00"), "YYYY"), "2018"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-12-31 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-01-01 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-30 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-31 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-01-01 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-31 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-01-01 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-01-02 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-01-03 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-31 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-01-01 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-01-02 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-12-31 00:00:00"), "YYYY"), "2022"); + + // Weekyear cases of spark legacy date formatter with default config. + queryCtx_->testingOverrideConfigUnsafe({ + {core::QueryConfig::kSparkLegacyDateFormatter, "true"}, + {core::QueryConfig::kSparkFirstDayOfWeek, std::to_string(1)}, + {core::QueryConfig::kSparkMinimalDaysInFirstWeek, std::to_string(1)}, + }); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-01-01 00:00:00"), "YYYY"), "2017"); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-12-31 00:00:00"), "YYYY"), "2018"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-01-01 00:00:00"), "YYYY"), "2018"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-12-30 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-12-31 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-01-01 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-29 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-30 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-31 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-01-01 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-27 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-28 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-29 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-30 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-31 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-01-01 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-26 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-27 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-28 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-29 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-30 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-31 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-01-01 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-12-31 00:00:00"), "YYYY"), "2022"); + + // Week config should only apply to spark legacy date formatter. + queryCtx_->testingOverrideConfigUnsafe({ + {core::QueryConfig::kSparkLegacyDateFormatter, "false"}, + {core::QueryConfig::kSparkFirstDayOfWeek, std::to_string(1)}, + {core::QueryConfig::kSparkMinimalDaysInFirstWeek, std::to_string(1)}, + }); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-12-31 00:00:00"), "x"), "2017"); + + // Reset config. + queryCtx_->testingOverrideConfigUnsafe({}); + // Invalid format. VELOX_ASSERT_THROW( fromUnixTime(0, "yyyy-AA"), "Specifier A is not supported.");