diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala index 2098ea6248af..32f8d03b7b4b 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala @@ -2206,5 +2206,20 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite spark.sql("drop table test_tbl_3521") } + test("GLUTEN-3948: trunc function") { + withSQLConf( + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> (ConstantFolding.ruleName + "," + NullPropagation.ruleName)) { + runQueryAndCompare( + "select trunc('2023-12-06', 'MM'), trunc('2023-12-06', 'YEAR'), trunc('2023-12-06', 'WEEK'), trunc('2023-12-06', 'QUARTER')", + noFallBack = false + )(checkOperatorMatch[ProjectExecTransformer]) + + runQueryAndCompare( + "select trunc(l_shipdate, 'MM'), trunc(l_shipdate, 'YEAR'), trunc(l_shipdate, 'WEEK'), " + + "trunc(l_shipdate, 'QUARTER') from lineitem" + )(checkOperatorMatch[ProjectExecTransformer]) + } + } + } // scalastyle:on line.size.limit diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp index f70132b4d378..ec6d04e73f23 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp @@ -684,27 +684,6 @@ SerializedPlanParser::getFunctionName(const std::string & function_signature, co else throw Exception(ErrorCodes::BAD_ARGUMENTS, "The first arg of spark extract function is wrong."); } - else if (function_name == "trunc") - { - if (args.size() != 2) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Spark function trunc requires two args, function:{}", function.ShortDebugString()); - - const auto & trunc_field = args.at(0); - if (!trunc_field.value().has_literal() || !trunc_field.value().literal().has_string()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second arg of spark trunc function is wrong."); - - const auto & field_value = trunc_field.value().literal().string(); - if (field_value == "YEAR" || field_value == "YYYY" || field_value == "YY") - ch_function_name = "toStartOfYear"; - else if (field_value == "QUARTER") - ch_function_name = "toStartOfQuarter"; - else if (field_value == "MONTH" || field_value == "MM" || field_value == "MON") - ch_function_name = "toStartOfMonth"; - else if (field_value == "WEEK") - ch_function_name = "toStartOfWeek"; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second arg of spark trunc function is wrong, value:{}", field_value); - } else if (function_name == "sha2") { if (args.size() != 2) @@ -1257,9 +1236,8 @@ void SerializedPlanParser::parseFunctionArguments( parsed_args.emplace_back(&mode_node); } } - else if (startsWith(function_signature, "trunc:") || startsWith(function_signature, "sha2:")) + else if (startsWith(function_signature, "sha2:")) { - /// Skip the last arg of trunc in substrait for (int i = 0; i < args.size() - 1; i++) parseFunctionArgument(actions_dag, parsed_args, function_name, args[i]); } diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h b/cpp-ch/local-engine/Parser/SerializedPlanParser.h index be12b4a65311..4c2a17a6efec 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h @@ -181,7 +181,6 @@ static const std::map SCALAR_FUNCTIONS {"datediff", "dateDiff"}, {"second", "toSecond"}, {"add_months", "addMonths"}, - {"trunc", ""}, /// dummy mapping {"date_trunc", "dateTrunc"}, {"floor_datetime", "dateTrunc"}, {"months_between", "sparkMonthsBetween"}, diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/trunc.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/trunc.cpp new file mode 100644 index 000000000000..db45bb464a52 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/trunc.cpp @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +} + +namespace local_engine +{ + +class FunctionParserTrunc : public FunctionParser +{ +public: + explicit FunctionParserTrunc(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) {} + ~FunctionParserTrunc() override = default; + + static constexpr auto name = "trunc"; + + String getName() const override { return name; } + + const ActionsDAG::Node * parse( + const substrait::Expression_ScalarFunction & substrait_func, + ActionsDAGPtr & actions_dag) const override + { + auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag); + if (parsed_args.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires two arguments", getName()); + + const auto * date_arg = parsed_args[0]; + const auto & fmt_field = substrait_func.arguments().at(1); + if (!fmt_field.value().has_literal() || !fmt_field.value().literal().has_string()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported fmt argument, should be a string literal, but: {}", fmt_field.DebugString()); + + const ActionsDAG::Node * result_node = nullptr; + const auto & field_value = Poco::toUpper(fmt_field.value().literal().string()); + if (field_value == "YEAR" || field_value == "YYYY" || field_value == "YY") + result_node = toFunctionNode(actions_dag, "toStartOfYear", {date_arg}); + else if (field_value == "QUARTER") + result_node = toFunctionNode(actions_dag, "toStartOfQuarter", {date_arg}); + else if (field_value == "MONTH" || field_value == "MM" || field_value == "MON") + result_node = toFunctionNode(actions_dag, "toStartOfMonth", {date_arg}); + else if (field_value == "WEEK") + { + const auto * mode_node = addColumnToActionsDAG(actions_dag, std::make_shared(), 1); + result_node = toFunctionNode(actions_dag, "toStartOfWeek", {date_arg, mode_node}); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported fmt argument: {}", field_value); + return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag); + } +}; + +static FunctionParserRegister register_trunc; +} diff --git a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index a7dbf511eaea..a562da85e283 100644 --- a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -270,7 +270,6 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("groupBy.as") enableSuite[GlutenDateFunctionsSuite] .exclude("function to_date") - .exclude("function trunc") .exclude("from_unixtime") .exclude("unix_timestamp") .exclude("to_unix_timestamp") diff --git a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index 7550a58e3941..265b34279e62 100644 --- a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -298,7 +298,6 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements") enableSuite[GlutenDateFunctionsSuite] .exclude("function to_date") - .exclude("function trunc") .exclude("from_unixtime") .exclude("unix_timestamp") .exclude("to_unix_timestamp") diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index 80337550a9bb..2a26440eba3b 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -300,7 +300,6 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements") enableSuite[GlutenDateFunctionsSuite] .exclude("function to_date") - .exclude("function trunc") .exclude("from_unixtime") .exclude("unix_timestamp") .exclude("to_unix_timestamp")