From dedbdafa3270eef677736b27ba5324aa0083b434 Mon Sep 17 00:00:00 2001 From: Lingfeng Zhang Date: Thu, 5 Sep 2024 17:14:31 +0800 Subject: [PATCH 1/3] Support week year --- velox/core/QueryConfig.h | 32 +++ velox/functions/lib/DateTimeFormatter.cpp | 25 ++- velox/functions/lib/DateTimeFormatter.h | 20 ++ velox/functions/lib/TimeUtils.h | 93 ++++++++ velox/functions/lib/tests/CMakeLists.txt | 1 + .../lib/tests/DateTimeFormatterTest.cpp | 20 ++ velox/functions/lib/tests/TimeUtilsTest.cpp | 211 ++++++++++++++++++ .../prestosql/tests/DateTimeFunctionsTest.cpp | 23 +- velox/functions/sparksql/DateTimeFunctions.h | 97 ++++---- .../sparksql/tests/DateTimeFunctionsTest.cpp | 65 ++++++ 10 files changed, 531 insertions(+), 56 deletions(-) create mode 100644 velox/functions/lib/tests/TimeUtilsTest.cpp diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h index b39df5397910..3a04c0189b1c 100644 --- a/velox/core/QueryConfig.h +++ b/velox/core/QueryConfig.h @@ -333,6 +333,22 @@ class QueryConfig { static constexpr const char* kSparkLegacyDateFormatter = "spark.legacy_date_formatter"; + /// The first day-of-week varies by culture. + /// firstDayOfWeek is a 1-based weekday number starting with Sunday. It + /// determines how week-based calendar works. For example, the ISO-8601 use + /// Monday (2) and the US uses Sunday (1). It should be set to match the + /// 'Calender.getFirstDayOfWeek()' in Java. Sunday (1) is used by default. + static constexpr const char* kSparkFirstDayOfWeek = + "spark.legacy_date_formatter.first_day_of_week"; + + /// The minimal number of days in the first week by culture. + /// The week that includes January 1st and has 'minimalDaysInFirstWeek' or + /// more days is referred to as week 1. It determines how week-based calendar + /// works. It should be set to match the + /// 'Calender.getMinimalDaysInFirstWeek()' in Java. 1 days is used by default. + static constexpr const char* kSparkMinimalDaysInFirstWeek = + "spark.legacy_date_formatter.minimal_days_in_first_week"; + /// The number of local parallel table writer operators per task. static constexpr const char* kTaskWriterCount = "task_writer_count"; @@ -742,6 +758,22 @@ class QueryConfig { return get(kSparkLegacyDateFormatter, false); } + uint8_t sparkFirstDayOfWeek() const { + auto value = get(kSparkFirstDayOfWeek, 1); + VELOX_CHECK( + 1 <= value && value <= 7, + "firstDayOfWeek must be a number between 1 and 7"); + return static_cast(value); + } + + uint8_t sparkMinimalDaysInFirstWeek() const { + auto value = get(kSparkMinimalDaysInFirstWeek, 1); + VELOX_CHECK( + 1 <= value && value <= 7, + "minimalDaysInFirstWeek must be a number between 1 and 7"); + return static_cast(value); + } + bool exprTrackCpuUsage() const { return get(kExprTrackCpuUsage, false); } diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 0dfd3f652aa1..7268b09f78d5 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -24,6 +24,7 @@ #include "velox/external/date/iso_week.h" #include "velox/external/date/tz.h" #include "velox/functions/lib/DateTimeFormatterBuilder.h" +#include "velox/functions/lib/TimeUtils.h" #include "velox/type/TimestampConversion.h" #include "velox/type/tz/TimeZoneMap.h" @@ -1175,6 +1176,7 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { size += 2; break; case DateTimeFormatSpecifier::YEAR_OF_ERA: + case DateTimeFormatSpecifier::WEEK_YEAR: // Timestamp is in [-32767-01-01, 32767-12-31] range. size += std::max((int)token.pattern.minRepresentDigits, 6); break; @@ -1245,7 +1247,6 @@ uint32_t DateTimeFormatter::maxResultSize(const tz::TimeZone* timezone) const { } break; // Not supported. - case DateTimeFormatSpecifier::WEEK_YEAR: default: VELOX_UNSUPPORTED( "Date format specifier is not supported: {}", @@ -1542,7 +1543,22 @@ int32_t DateTimeFormatter::format( result); break; } - case DateTimeFormatSpecifier::WEEK_YEAR: + case DateTimeFormatSpecifier::WEEK_YEAR: { + auto year = getWeekYear( + static_cast(calDate.year()), + static_cast(calDate.month()), + static_cast(calDate.day()), + firstDayOfWeek_, + minimalDaysInFirstWeek_); + + result += padContent( + static_cast(year), + '0', + token.pattern.minRepresentDigits, + maxResultEnd, + result); + break; + } default: VELOX_UNSUPPORTED( "format is not supported for specifier {}", @@ -2046,14 +2062,11 @@ Expected> buildSimpleDateTimeFormatter( case 'W': builder.appendWeekOfMonth(count); break; - case 'x': - builder.appendWeekYear(count); - break; case 'y': builder.appendYear(count); break; case 'Y': - builder.appendYearOfEra(count); + builder.appendWeekYear(count); break; case 'z': builder.appendTimeZone(count); diff --git a/velox/functions/lib/DateTimeFormatter.h b/velox/functions/lib/DateTimeFormatter.h index 62678f0f0334..c0fbb2fdb4c9 100644 --- a/velox/functions/lib/DateTimeFormatter.h +++ b/velox/functions/lib/DateTimeFormatter.h @@ -215,11 +215,31 @@ class DateTimeFormatter { bool allowOverflow = false, const std::optional& zeroOffsetText = std::nullopt) const; + void setFirstDayOfWeek(uint8_t firstDayOfWeek) { + firstDayOfWeek_ = firstDayOfWeek; + } + + void setMinimalDaysInFirstWeek(uint8_t minimalDaysInFirstWeek) { + minimalDaysInFirstWeek_ = minimalDaysInFirstWeek; + } + private: std::unique_ptr literalBuf_; size_t bufSize_; std::vector tokens_; DateTimeFormatterType type_; + + /// The first day-of-week varies by culture. + /// firstDayOfWeek is a 1-based weekday number starting with Sunday. It + /// determines how week-based calendar works. For example, the ISO-8601 use + /// Monday (2) and the US uses Sunday (1). + uint8_t firstDayOfWeek_ = 2; + + /// The minimal number of days in the first week by culture. + /// The week that includes January 1st and has 'minimalDaysInFirstWeek' or + /// more days is referred to as week 1. It determines how week-based calendar + /// works. For example, the ISO-8601 use 4 days. + uint8_t minimalDaysInFirstWeek_ = 4; }; Expected> buildMysqlDateTimeFormatter( diff --git a/velox/functions/lib/TimeUtils.h b/velox/functions/lib/TimeUtils.h index 2924ced00481..f298d75eca0e 100644 --- a/velox/functions/lib/TimeUtils.h +++ b/velox/functions/lib/TimeUtils.h @@ -20,6 +20,7 @@ #include "velox/external/date/date.h" #include "velox/external/date/iso_week.h" #include "velox/functions/Macros.h" +#include "velox/type/TimestampConversion.h" #include "velox/type/tz/TimeZoneMap.h" namespace facebook::velox::functions { @@ -123,4 +124,96 @@ struct InitSessionTimezone { timeZone_ = getTimeZoneFromConfig(config); } }; + +/// Return day-of-year (DOY) of the first `dayOfWeek` in the year. +/// +/// `dayOfWeek` is a 1-based weekday number starting with Sunday. +/// (1 = Sunday, 2 = Monday, ..., 7 = Saturday). +/// +/// If the `dayOfWeek` is Monday, it returns DOY of first Monday in +/// the year. The returned DOY is a number from 1 to 7. +FOLLY_ALWAYS_INLINE +uint32_t getDayOfFirstDayOfWeek(int32_t y, uint32_t dayOfWeek) { + auto firstDay = + date::year_month_day(date::year(y), date::month(1), date::day(1)); + auto weekday = date::weekday(firstDay).c_encoding() + 1; + + int32_t delta = dayOfWeek - weekday; + if (delta < 0) { + delta += 7; + } + + return delta + 1; +} + +/// Return the week year represented by Gregorian calendar for the given year, +/// month and day. +/// +/// getWeekYear only works with Gregorian calendar due to limitations in the +/// date library. As a result, dates before the Gregorian calendar +/// (1582-10-15) yields mismatched results. +/// +/// The week that includes January 1st and has 'minimalDaysInFirstWeek' or more +/// days is referred to as week 1. The starting day of the week is decided by +/// the `firstDayOfWeek`, which is a 1-based weekday number starting with +/// Sunday. +/// +/// For ISO 8601, `firstDayOfWeek` is 2 (Monday) and `minimalDaysInFirstWeek` +/// is 4. For legacy Spark, `firstDayOfWeek` is 1 (Sunday) and +/// `minimalDaysInFirstWeek` is 1. +/// +/// The algorithm refers to the getWeekYear algorithm in openjdk: +/// https://github.com/openjdk/jdk/blob/d9c67443f7d7f03efb2837b63ee2acc6113f737f/src/java.base/share/classes/java/util/GregorianCalendar.java#L2058 +FOLLY_ALWAYS_INLINE +int32_t getWeekYear( + int32_t y, + uint32_t m, + uint32_t d, + uint32_t firstDayOfWeek, + uint32_t minimalDaysInFirstWeek) { + auto ymd = date::year_month_day(date::year(y), date::month(m), date::day(d)); + auto firstDayOfTheYear = + date::year_month_day(ymd.year(), date::month(1), date::day(1)); + auto dayOfYear = + (date::sys_days{ymd} - date::sys_days{firstDayOfTheYear}).count() + 1; + auto maxDayOfYear = util::isLeapYear(y) ? 366 : 365; + + // If this week does not cross the years (`7 < dayOfYear && dayOfYear < + // (maxDayOfYear - 6)`), the weekyear must be equal to the year. + // + // If some days of this week fall in the last year and `minimalDaysInFirstWeek + // < dayOfYear`, the number of days in this week in this year must be greater + // than minimalDaysInFirstWeek, so the weekyear must be equal to the year. + // + // Since minimalDaysInFirstWeek always no more than 7, these two conditions + // can be reduced to the following code. + if (dayOfYear > minimalDaysInFirstWeek && dayOfYear < (maxDayOfYear - 6)) { + return y; + } + + auto year = y; + // Day of begining of first complete week of this year. + auto minDayOfYear = getDayOfFirstDayOfWeek(y, firstDayOfWeek); + if (dayOfYear >= minDayOfYear) { + // Day of ending of first week of the last year. + auto minDayOfYear = getDayOfFirstDayOfWeek(y + 1, firstDayOfWeek) - 1; + if (minDayOfYear == 0) { + minDayOfYear = 7; + } + + // If that week belongs to the next weekyear. + if (minDayOfYear >= minimalDaysInFirstWeek) { + // If dayOfYear is in that week. + int days = maxDayOfYear - dayOfYear + 1; + if (days <= (7 - minDayOfYear)) { + ++year; + } + } + } else if (minDayOfYear <= minimalDaysInFirstWeek) { + // Days of the first week in this year less then minimalDaysInFirstWeek + --year; + } + + return year; +} } // namespace facebook::velox::functions diff --git a/velox/functions/lib/tests/CMakeLists.txt b/velox/functions/lib/tests/CMakeLists.txt index c3486a668729..3fa17a1eafa7 100644 --- a/velox/functions/lib/tests/CMakeLists.txt +++ b/velox/functions/lib/tests/CMakeLists.txt @@ -24,6 +24,7 @@ add_executable( Re2FunctionsTest.cpp RepeatTest.cpp Utf8Test.cpp + TimeUtilsTest.cpp ZetaDistributionTest.cpp) add_test( diff --git a/velox/functions/lib/tests/DateTimeFormatterTest.cpp b/velox/functions/lib/tests/DateTimeFormatterTest.cpp index e81c44728dba..5aabe9c6c569 100644 --- a/velox/functions/lib/tests/DateTimeFormatterTest.cpp +++ b/velox/functions/lib/tests/DateTimeFormatterTest.cpp @@ -1357,6 +1357,26 @@ TEST_F(JodaDateTimeFormatterTest, betterErrorMessaging) { "Value 429 for dayOfMonth must be in the range [1,365] for year 2057 and month 2."); } +TEST_F(JodaDateTimeFormatterTest, formatWeekYear) { + DateTimeFormatterBuilder builder(10); + auto formatter = + builder.appendWeekYear(4).setType(DateTimeFormatterType::JODA).build(); + auto* timezone = tz::locateZone("GMT"); + const auto maxSize = formatter->maxResultSize(timezone); + + auto weekYear = [&](const StringView& time) { + std::string result(maxSize, '\0'); + auto resultSize = formatter->format( + fromTimestampString(time), timezone, maxSize, result.data()); + result.resize(resultSize); + return result; + }; + + EXPECT_EQ(weekYear("2019-12-31 00:00:00"), "2020"); + EXPECT_EQ(weekYear("2020-12-26 00:00:00"), "2020"); + EXPECT_EQ(weekYear("2021-01-01 00:00:00"), "2020"); +} + class MysqlDateTimeTest : public DateTimeFormatterTest {}; TEST_F(MysqlDateTimeTest, validBuild) { diff --git a/velox/functions/lib/tests/TimeUtilsTest.cpp b/velox/functions/lib/tests/TimeUtilsTest.cpp new file mode 100644 index 000000000000..ec392d63a2f0 --- /dev/null +++ b/velox/functions/lib/tests/TimeUtilsTest.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/functions/lib/TimeUtils.h" +#include + +namespace facebook::velox::functions::test { + +class TimeUtilsTest : public testing::Test {}; + +TEST_F(TimeUtilsTest, getFirstDayOfWeek) { + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 1), 7); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 2), 1); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 3), 2); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 4), 3); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 5), 4); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 6), 5); + EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 7), 6); +} + +class WeekYearTest + : public testing::TestWithParam< + std:: + tuple> { +}; + +TEST_F(TimeUtilsTest, getWeakYear) { + EXPECT_EQ(getWeekYear(2017, 01, 01, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 01, 2, 4), 2016); // 2016W52 + EXPECT_EQ(getWeekYear(2017, 01, 02, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 02, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 03, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 03, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 04, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 04, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 05, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 05, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 06, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 06, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 07, 1, 1), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 01, 07, 2, 4), 2017); // 2017W1 + EXPECT_EQ(getWeekYear(2017, 12, 25, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 25, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 26, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 26, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 27, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 27, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 28, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 28, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 29, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 29, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 30, 1, 1), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 30, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2017, 12, 31, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2017, 12, 31, 2, 4), 2017); // 2017W52 + EXPECT_EQ(getWeekYear(2018, 01, 01, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 01, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 02, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 02, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 03, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 03, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 04, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 04, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 05, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 05, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 06, 1, 1), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 06, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 01, 07, 1, 1), 2018); // 2018W2 + EXPECT_EQ(getWeekYear(2018, 01, 07, 2, 4), 2018); // 2018W1 + EXPECT_EQ(getWeekYear(2018, 12, 25, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 25, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 26, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 26, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 27, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 27, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 28, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 28, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 29, 1, 1), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 29, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 30, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2018, 12, 30, 2, 4), 2018); // 2018W52 + EXPECT_EQ(getWeekYear(2018, 12, 31, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2018, 12, 31, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 01, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 01, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 02, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 02, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 03, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 03, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 04, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 04, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 05, 1, 1), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 05, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 06, 1, 1), 2019); // 2019W2 + EXPECT_EQ(getWeekYear(2019, 01, 06, 2, 4), 2019); // 2019W1 + EXPECT_EQ(getWeekYear(2019, 01, 07, 1, 1), 2019); // 2019W2 + EXPECT_EQ(getWeekYear(2019, 01, 07, 2, 4), 2019); // 2019W2 + EXPECT_EQ(getWeekYear(2019, 12, 25, 1, 1), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 25, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 26, 1, 1), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 26, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 27, 1, 1), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 27, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 28, 1, 1), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 28, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 29, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2019, 12, 29, 2, 4), 2019); // 2019W52 + EXPECT_EQ(getWeekYear(2019, 12, 30, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2019, 12, 30, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2019, 12, 31, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2019, 12, 31, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 01, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 01, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 02, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 02, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 03, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 03, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 04, 1, 1), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 04, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 05, 1, 1), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 01, 05, 2, 4), 2020); // 2020W1 + EXPECT_EQ(getWeekYear(2020, 01, 06, 1, 1), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 01, 06, 2, 4), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 01, 07, 1, 1), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 01, 07, 2, 4), 2020); // 2020W2 + EXPECT_EQ(getWeekYear(2020, 12, 25, 1, 1), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 25, 2, 4), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 26, 1, 1), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 26, 2, 4), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 27, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 27, 2, 4), 2020); // 2020W52 + EXPECT_EQ(getWeekYear(2020, 12, 28, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 28, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2020, 12, 29, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 29, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2020, 12, 30, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 30, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2020, 12, 31, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2020, 12, 31, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2021, 01, 01, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 01, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2021, 01, 02, 1, 1), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 02, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2021, 01, 03, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 03, 2, 4), 2020); // 2020W53 + EXPECT_EQ(getWeekYear(2021, 01, 04, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 04, 2, 4), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 05, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 05, 2, 4), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 06, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 06, 2, 4), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 01, 07, 1, 1), 2021); // 2021W2 + EXPECT_EQ(getWeekYear(2021, 01, 07, 2, 4), 2021); // 2021W1 + EXPECT_EQ(getWeekYear(2021, 12, 25, 1, 1), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 25, 2, 4), 2021); // 2021W51 + EXPECT_EQ(getWeekYear(2021, 12, 26, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 26, 2, 4), 2021); // 2021W51 + EXPECT_EQ(getWeekYear(2021, 12, 27, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 27, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 28, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 28, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 29, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 29, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 30, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 30, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2021, 12, 31, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2021, 12, 31, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2022, 01, 01, 1, 1), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 01, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2022, 01, 02, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 02, 2, 4), 2021); // 2021W52 + EXPECT_EQ(getWeekYear(2022, 01, 03, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 03, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 04, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 04, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 05, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 05, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 06, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 06, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 01, 07, 1, 1), 2022); // 2022W2 + EXPECT_EQ(getWeekYear(2022, 01, 07, 2, 4), 2022); // 2022W1 + EXPECT_EQ(getWeekYear(2022, 12, 25, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 25, 2, 4), 2022); // 2022W51 + EXPECT_EQ(getWeekYear(2022, 12, 26, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 26, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 27, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 27, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 28, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 28, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 29, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 29, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 30, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 30, 2, 4), 2022); // 2022W52 + EXPECT_EQ(getWeekYear(2022, 12, 31, 1, 1), 2022); // 2022W53 + EXPECT_EQ(getWeekYear(2022, 12, 31, 2, 4), 2022); // 2022W52 +} + +} // namespace facebook::velox::functions::test diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index becf234485fd..b666a7bf2fb2 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -3675,8 +3675,6 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { parseTimestamp("1970-01-01 00:00:00"), "YYYY-MM-dd HH:mm:ss")); // User format errors or unsupported errors. - EXPECT_THROW( - formatDatetime(parseTimestamp("1970-01-01"), "x"), VeloxUserError); EXPECT_THROW( formatDatetime(parseTimestamp("1970-01-01"), "q"), VeloxUserError); EXPECT_THROW( @@ -3760,6 +3758,24 @@ TEST_F(DateTimeFunctionsTest, dateFormat) { dateFormat( parseTimestamp("-2000-02-29 00:00:00.987"), "%Y-%m-%d %H:%i:%s.%f")); + // Week year cases. + EXPECT_EQ("2016", dateFormat(parseTimestamp("2017-01-01"), "%x")); + EXPECT_EQ("2017", dateFormat(parseTimestamp("2017-12-31"), "%x")); + EXPECT_EQ("2018", dateFormat(parseTimestamp("2018-01-01"), "%x")); + EXPECT_EQ("2019", dateFormat(parseTimestamp("2018-12-31"), "%x")); + EXPECT_EQ("2019", dateFormat(parseTimestamp("2019-01-01"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2019-12-30"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2019-12-31"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2020-01-01"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2020-12-31"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2021-01-01"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2021-01-02"), "%x")); + EXPECT_EQ("2020", dateFormat(parseTimestamp("2021-01-03"), "%x")); + EXPECT_EQ("2021", dateFormat(parseTimestamp("2021-12-31"), "%x")); + EXPECT_EQ("2021", dateFormat(parseTimestamp("2022-01-01"), "%x")); + EXPECT_EQ("2021", dateFormat(parseTimestamp("2022-01-02"), "%x")); + EXPECT_EQ("2022", dateFormat(parseTimestamp("2022-12-31"), "%x")); + // Varying digit year cases. EXPECT_EQ("06", dateFormat(parseTimestamp("-6-06-20"), "%y")); EXPECT_EQ("-0006", dateFormat(parseTimestamp("-6-06-20"), "%Y")); @@ -4001,9 +4017,6 @@ TEST_F(DateTimeFunctionsTest, dateFormat) { VELOX_ASSERT_THROW( dateFormat(timestamp, "%X"), "Date format specifier is not supported: %X"); - VELOX_ASSERT_THROW( - dateFormat(timestamp, "%x"), - "Date format specifier is not supported: WEEK_YEAR"); } TEST_F(DateTimeFunctionsTest, dateFormatTimestampWithTimezone) { diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index e0d44130b8e1..fa12ba7687fe 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -26,19 +26,31 @@ namespace facebook::velox::functions::sparksql { namespace detail { -Expected> getDateTimeFormatter( - const std::string_view& format, - DateTimeFormatterType type) { - switch (type) { - case DateTimeFormatterType::STRICT_SIMPLE: - return buildSimpleDateTimeFormatter(format, /*lenient=*/false); - case DateTimeFormatterType::LENIENT_SIMPLE: - return buildSimpleDateTimeFormatter(format, /*lenient=*/true); - default: - return buildJodaDateTimeFormatter( - std::string_view(format.data(), format.size())); +struct DateTimeFormatterProvider { + DateTimeFormatterProvider(const core::QueryConfig& config) + : legacyFormatter_(config.sparkLegacyDateFormatter()), + firstDayOfWeek_(config.sparkFirstDayOfWeek()), + minimalDaysInFirstWeek_(config.sparkMinimalDaysInFirstWeek()) {} + + FOLLY_ALWAYS_INLINE auto get(const std::string_view& format) const { + if (legacyFormatter_) { + return buildSimpleDateTimeFormatter(format, /*lenient=*/false) + .then([this](std::shared_ptr f) { + f->setFirstDayOfWeek(firstDayOfWeek_); + f->setMinimalDaysInFirstWeek(minimalDaysInFirstWeek_); + return f; + }); + } + + return buildJodaDateTimeFormatter( + std::string_view(format.data(), format.size())); } -} + + private: + const bool legacyFormatter_; + const uint8_t firstDayOfWeek_; + const uint8_t minimalDaysInFirstWeek_; +}; } // namespace detail template @@ -127,10 +139,9 @@ struct UnixTimestampParseFunction { const std::vector& /*inputTypes*/, const core::QueryConfig& config, const arg_type* /*input*/) { - auto formatter = detail::getDateTimeFormatter( - kDefaultFormat_, - config.sparkLegacyDateFormatter() ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); + formatterProvider_ = + std::make_shared(config); + auto formatter = formatterProvider_->get(kDefaultFormat_); VELOX_CHECK(!formatter.hasError(), "Default format should always be valid"); format_ = formatter.value(); setTimezone(config); @@ -165,6 +176,7 @@ struct UnixTimestampParseFunction { // Default if format is not specified, as per Spark documentation. constexpr static std::string_view kDefaultFormat_{"yyyy-MM-dd HH:mm:ss"}; std::shared_ptr format_; + std::shared_ptr formatterProvider_; const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // fallback to GMT. }; @@ -180,12 +192,11 @@ struct UnixTimestampParseWithFormatFunction const core::QueryConfig& config, const arg_type* /*input*/, const arg_type* format) { - legacyFormatter_ = config.sparkLegacyDateFormatter(); + this->formatterProvider_ = + std::make_shared(config); if (format != nullptr) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(format->data(), format->size()), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); + auto formatter = this->formatterProvider_->get( + std::string_view(format->data(), format->size())); if (formatter.hasError()) { invalidFormat_ = true; } else { @@ -206,10 +217,9 @@ struct UnixTimestampParseWithFormatFunction // Format error returns null. if (!isConstFormat_) { - auto formatter = detail::getDateTimeFormatter( - std::string_view(format.data(), format.size()), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA); + VELOX_CHECK_NOT_NULL(this->formatterProvider_); + auto formatter = this->formatterProvider_->get( + std::string_view(format.data(), format.size())); if (formatter.hasError()) { return false; } @@ -229,7 +239,6 @@ struct UnixTimestampParseWithFormatFunction private: bool isConstFormat_{false}; bool invalidFormat_{false}; - bool legacyFormatter_{false}; }; // Parses unix time in seconds to a formatted string. @@ -242,7 +251,8 @@ struct FromUnixtimeFunction { const core::QueryConfig& config, const arg_type* /*unixtime*/, const arg_type* format) { - legacyFormatter_ = config.sparkLegacyDateFormatter(); + formatterProvider_ = + std::make_shared(config); sessionTimeZone_ = getTimeZoneFromConfig(config); if (format != nullptr) { setFormatter(*format); @@ -267,13 +277,12 @@ struct FromUnixtimeFunction { private: FOLLY_ALWAYS_INLINE void setFormatter(const arg_type& format) { - formatter_ = detail::getDateTimeFormatter( - std::string_view(format.data(), format.size()), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA) - .thenOrThrow(folly::identity, [&](const Status& status) { - VELOX_USER_FAIL("{}", status.message()); - }); + VELOX_CHECK_NOT_NULL(formatterProvider_); + formatter_ = + formatterProvider_->get(std::string_view(format.data(), format.size())) + .thenOrThrow(folly::identity, [&](const Status& status) { + VELOX_USER_FAIL("{}", status.message()); + }); maxResultSize_ = formatter_->maxResultSize(sessionTimeZone_); } @@ -281,7 +290,7 @@ struct FromUnixtimeFunction { std::shared_ptr formatter_; uint32_t maxResultSize_; bool isConstantTimeFormat_{false}; - bool legacyFormatter_{false}; + std::shared_ptr formatterProvider_; }; template @@ -355,16 +364,15 @@ struct GetTimestampFunction { const core::QueryConfig& config, const arg_type* /*input*/, const arg_type* format) { - legacyFormatter_ = config.sparkLegacyDateFormatter(); + formatterProvider_ = + std::make_shared(config); auto sessionTimezoneName = config.sessionTimezone(); if (!sessionTimezoneName.empty()) { sessionTimeZone_ = tz::locateZone(sessionTimezoneName); } if (format != nullptr) { - formatter_ = detail::getDateTimeFormatter( - std::string_view(*format), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA) + formatter_ = formatterProvider_ + ->get(std::string_view(format->data(), format->size())) .thenOrThrow(folly::identity, [&](const Status& status) { VELOX_USER_FAIL("{}", status.message()); }); @@ -377,10 +385,9 @@ struct GetTimestampFunction { const arg_type& input, const arg_type& format) { if (!isConstantTimeFormat_) { - formatter_ = detail::getDateTimeFormatter( - std::string_view(format), - legacyFormatter_ ? DateTimeFormatterType::STRICT_SIMPLE - : DateTimeFormatterType::JODA) + VELOX_CHECK_NOT_NULL(formatterProvider_); + formatter_ = formatterProvider_ + ->get(std::string_view(format.data(), format.size())) .thenOrThrow(folly::identity, [&](const Status& status) { VELOX_USER_FAIL("{}", status.message()); }); @@ -405,7 +412,7 @@ struct GetTimestampFunction { std::shared_ptr formatter_{nullptr}; bool isConstantTimeFormat_{false}; const tz::TimeZone* sessionTimeZone_{tz::locateZone(0)}; // default to GMT. - bool legacyFormatter_{false}; + std::shared_ptr formatterProvider_; }; template diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index c462758694f5..405734581fb5 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -919,6 +919,71 @@ TEST_F(DateTimeFunctionsTest, fromUnixtime) { fromUnixTime(getUnixTime("2020-06-30 23:59:59"), "yyyy-MM-dd HH:mm:ss"), "2020-07-01 07:59:59"); + // Weekyear cases of ISO-8601 standard. + queryCtx_->testingOverrideConfigUnsafe({ + {core::QueryConfig::kSparkLegacyDateFormatter, "true"}, + {core::QueryConfig::kSparkFirstDayOfWeek, std::to_string(2)}, + {core::QueryConfig::kSparkMinimalDaysInFirstWeek, std::to_string(4)}, + }); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-01-01 00:00:00"), "YYYY"), "2016"); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-12-31 00:00:00"), "YYYY"), "2017"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-01-01 00:00:00"), "YYYY"), "2018"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-12-31 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-01-01 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-30 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-31 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-01-01 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-31 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-01-01 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-01-02 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-01-03 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-31 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-01-01 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-01-02 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-12-31 00:00:00"), "YYYY"), "2022"); + + // Weekyear cases of spark legacy date formatter with default config. + queryCtx_->testingOverrideConfigUnsafe({ + {core::QueryConfig::kSparkLegacyDateFormatter, "true"}, + {core::QueryConfig::kSparkFirstDayOfWeek, std::to_string(1)}, + {core::QueryConfig::kSparkMinimalDaysInFirstWeek, std::to_string(1)}, + }); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-01-01 00:00:00"), "YYYY"), "2017"); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-12-31 00:00:00"), "YYYY"), "2018"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-01-01 00:00:00"), "YYYY"), "2018"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-12-30 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2018-12-31 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-01-01 00:00:00"), "YYYY"), "2019"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-29 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-30 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2019-12-31 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-01-01 00:00:00"), "YYYY"), "2020"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-27 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-28 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-29 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-30 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2020-12-31 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-01-01 00:00:00"), "YYYY"), "2021"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-26 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-27 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-28 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-29 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-30 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2021-12-31 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-01-01 00:00:00"), "YYYY"), "2022"); + EXPECT_EQ(fromUnixTime(getUnixTime("2022-12-31 00:00:00"), "YYYY"), "2022"); + + // Week config should only apply to spark legacy date formatter. + queryCtx_->testingOverrideConfigUnsafe({ + {core::QueryConfig::kSparkLegacyDateFormatter, "false"}, + {core::QueryConfig::kSparkFirstDayOfWeek, std::to_string(1)}, + {core::QueryConfig::kSparkMinimalDaysInFirstWeek, std::to_string(1)}, + }); + EXPECT_EQ(fromUnixTime(getUnixTime("2017-12-31 00:00:00"), "x"), "2017"); + + // Reset config + queryCtx_->testingOverrideConfigUnsafe({}); + // Invalid format. VELOX_ASSERT_THROW( fromUnixTime(0, "yyyy-AA"), "Specifier A is not supported."); From 80b62d472ca1da7bddb044b15175534ba2c9232f Mon Sep 17 00:00:00 2001 From: Lingfeng Zhang Date: Thu, 7 Nov 2024 11:29:03 +0800 Subject: [PATCH 2/3] update --- velox/core/QueryConfig.h | 4 ++-- velox/functions/lib/TimeUtils.h | 11 ++++++----- velox/functions/lib/tests/DateTimeFormatterTest.cpp | 2 +- velox/functions/lib/tests/TimeUtilsTest.cpp | 6 ------ velox/functions/sparksql/DateTimeFunctions.h | 2 +- .../sparksql/tests/DateTimeFunctionsTest.cpp | 2 +- 6 files changed, 11 insertions(+), 16 deletions(-) diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h index 3a04c0189b1c..2f3d922e6a87 100644 --- a/velox/core/QueryConfig.h +++ b/velox/core/QueryConfig.h @@ -760,7 +760,7 @@ class QueryConfig { uint8_t sparkFirstDayOfWeek() const { auto value = get(kSparkFirstDayOfWeek, 1); - VELOX_CHECK( + VELOX_USER_CHECK( 1 <= value && value <= 7, "firstDayOfWeek must be a number between 1 and 7"); return static_cast(value); @@ -768,7 +768,7 @@ class QueryConfig { uint8_t sparkMinimalDaysInFirstWeek() const { auto value = get(kSparkMinimalDaysInFirstWeek, 1); - VELOX_CHECK( + VELOX_USER_CHECK( 1 <= value && value <= 7, "minimalDaysInFirstWeek must be a number between 1 and 7"); return static_cast(value); diff --git a/velox/functions/lib/TimeUtils.h b/velox/functions/lib/TimeUtils.h index f298d75eca0e..6cd6574f18dd 100644 --- a/velox/functions/lib/TimeUtils.h +++ b/velox/functions/lib/TimeUtils.h @@ -171,12 +171,13 @@ int32_t getWeekYear( uint32_t d, uint32_t firstDayOfWeek, uint32_t minimalDaysInFirstWeek) { - auto ymd = date::year_month_day(date::year(y), date::month(m), date::day(d)); - auto firstDayOfTheYear = + const auto ymd = + date::year_month_day(date::year(y), date::month(m), date::day(d)); + const auto firstDayOfTheYear = date::year_month_day(ymd.year(), date::month(1), date::day(1)); - auto dayOfYear = + const auto dayOfYear = (date::sys_days{ymd} - date::sys_days{firstDayOfTheYear}).count() + 1; - auto maxDayOfYear = util::isLeapYear(y) ? 366 : 365; + const auto maxDayOfYear = util::isLeapYear(y) ? 366 : 365; // If this week does not cross the years (`7 < dayOfYear && dayOfYear < // (maxDayOfYear - 6)`), the weekyear must be equal to the year. @@ -192,7 +193,7 @@ int32_t getWeekYear( } auto year = y; - // Day of begining of first complete week of this year. + // Day of beginning of first complete week of this year. auto minDayOfYear = getDayOfFirstDayOfWeek(y, firstDayOfWeek); if (dayOfYear >= minDayOfYear) { // Day of ending of first week of the last year. diff --git a/velox/functions/lib/tests/DateTimeFormatterTest.cpp b/velox/functions/lib/tests/DateTimeFormatterTest.cpp index 5aabe9c6c569..38e04bd6ec3b 100644 --- a/velox/functions/lib/tests/DateTimeFormatterTest.cpp +++ b/velox/functions/lib/tests/DateTimeFormatterTest.cpp @@ -1364,7 +1364,7 @@ TEST_F(JodaDateTimeFormatterTest, formatWeekYear) { auto* timezone = tz::locateZone("GMT"); const auto maxSize = formatter->maxResultSize(timezone); - auto weekYear = [&](const StringView& time) { + auto weekYear = [&](const StringView time) { std::string result(maxSize, '\0'); auto resultSize = formatter->format( fromTimestampString(time), timezone, maxSize, result.data()); diff --git a/velox/functions/lib/tests/TimeUtilsTest.cpp b/velox/functions/lib/tests/TimeUtilsTest.cpp index ec392d63a2f0..17ea07ababca 100644 --- a/velox/functions/lib/tests/TimeUtilsTest.cpp +++ b/velox/functions/lib/tests/TimeUtilsTest.cpp @@ -31,12 +31,6 @@ TEST_F(TimeUtilsTest, getFirstDayOfWeek) { EXPECT_EQ(getDayOfFirstDayOfWeek(2024, 7), 6); } -class WeekYearTest - : public testing::TestWithParam< - std:: - tuple> { -}; - TEST_F(TimeUtilsTest, getWeakYear) { EXPECT_EQ(getWeekYear(2017, 01, 01, 1, 1), 2017); // 2017W1 EXPECT_EQ(getWeekYear(2017, 01, 01, 2, 4), 2016); // 2016W52 diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index fa12ba7687fe..4e7d7b763f07 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -32,7 +32,7 @@ struct DateTimeFormatterProvider { firstDayOfWeek_(config.sparkFirstDayOfWeek()), minimalDaysInFirstWeek_(config.sparkMinimalDaysInFirstWeek()) {} - FOLLY_ALWAYS_INLINE auto get(const std::string_view& format) const { + FOLLY_ALWAYS_INLINE auto get(const std::string_view format) const { if (legacyFormatter_) { return buildSimpleDateTimeFormatter(format, /*lenient=*/false) .then([this](std::shared_ptr f) { diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index 405734581fb5..5e3f8b68204b 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -981,7 +981,7 @@ TEST_F(DateTimeFunctionsTest, fromUnixtime) { }); EXPECT_EQ(fromUnixTime(getUnixTime("2017-12-31 00:00:00"), "x"), "2017"); - // Reset config + // Reset config. queryCtx_->testingOverrideConfigUnsafe({}); // Invalid format. From f31a6433289a5e0f5997e2977f027bfc1af43b4d Mon Sep 17 00:00:00 2001 From: Lingfeng Zhang Date: Thu, 7 Nov 2024 17:41:46 +0800 Subject: [PATCH 3/3] update --- velox/functions/lib/tests/DateTimeFormatterTest.cpp | 2 +- velox/functions/sparksql/DateTimeFunctions.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/velox/functions/lib/tests/DateTimeFormatterTest.cpp b/velox/functions/lib/tests/DateTimeFormatterTest.cpp index 38e04bd6ec3b..1510c0f6baaa 100644 --- a/velox/functions/lib/tests/DateTimeFormatterTest.cpp +++ b/velox/functions/lib/tests/DateTimeFormatterTest.cpp @@ -1364,7 +1364,7 @@ TEST_F(JodaDateTimeFormatterTest, formatWeekYear) { auto* timezone = tz::locateZone("GMT"); const auto maxSize = formatter->maxResultSize(timezone); - auto weekYear = [&](const StringView time) { + auto weekYear = [&](StringView time) { std::string result(maxSize, '\0'); auto resultSize = formatter->format( fromTimestampString(time), timezone, maxSize, result.data()); diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index 4e7d7b763f07..61088bb9235f 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -32,7 +32,7 @@ struct DateTimeFormatterProvider { firstDayOfWeek_(config.sparkFirstDayOfWeek()), minimalDaysInFirstWeek_(config.sparkMinimalDaysInFirstWeek()) {} - FOLLY_ALWAYS_INLINE auto get(const std::string_view format) const { + FOLLY_ALWAYS_INLINE auto get(std::string_view format) const { if (legacyFormatter_) { return buildSimpleDateTimeFormatter(format, /*lenient=*/false) .then([this](std::shared_ptr f) {