Skip to content

Commit

Permalink
[GLUTEN-3948][CH] Fix exception and diff of trunc function (#3968)
Browse files Browse the repository at this point in the history
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)

(Fixes: #3948)

How was this patch tested?
add ut
  • Loading branch information
exmy authored Dec 8, 2023
1 parent 305924a commit d11a61b
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2206,5 +2206,20 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite
spark.sql("drop table test_tbl_3521")
}

test("GLUTEN-3948: trunc function") {
withSQLConf(
SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> (ConstantFolding.ruleName + "," + NullPropagation.ruleName)) {
runQueryAndCompare(
"select trunc('2023-12-06', 'MM'), trunc('2023-12-06', 'YEAR'), trunc('2023-12-06', 'WEEK'), trunc('2023-12-06', 'QUARTER')",
noFallBack = false
)(checkOperatorMatch[ProjectExecTransformer])

runQueryAndCompare(
"select trunc(l_shipdate, 'MM'), trunc(l_shipdate, 'YEAR'), trunc(l_shipdate, 'WEEK'), " +
"trunc(l_shipdate, 'QUARTER') from lineitem"
)(checkOperatorMatch[ProjectExecTransformer])
}
}

}
// scalastyle:on line.size.limit
24 changes: 1 addition & 23 deletions cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,27 +684,6 @@ SerializedPlanParser::getFunctionName(const std::string & function_signature, co
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The first arg of spark extract function is wrong.");
}
else if (function_name == "trunc")
{
if (args.size() != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Spark function trunc requires two args, function:{}", function.ShortDebugString());

const auto & trunc_field = args.at(0);
if (!trunc_field.value().has_literal() || !trunc_field.value().literal().has_string())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second arg of spark trunc function is wrong.");

const auto & field_value = trunc_field.value().literal().string();
if (field_value == "YEAR" || field_value == "YYYY" || field_value == "YY")
ch_function_name = "toStartOfYear";
else if (field_value == "QUARTER")
ch_function_name = "toStartOfQuarter";
else if (field_value == "MONTH" || field_value == "MM" || field_value == "MON")
ch_function_name = "toStartOfMonth";
else if (field_value == "WEEK")
ch_function_name = "toStartOfWeek";
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second arg of spark trunc function is wrong, value:{}", field_value);
}
else if (function_name == "sha2")
{
if (args.size() != 2)
Expand Down Expand Up @@ -1257,9 +1236,8 @@ void SerializedPlanParser::parseFunctionArguments(
parsed_args.emplace_back(&mode_node);
}
}
else if (startsWith(function_signature, "trunc:") || startsWith(function_signature, "sha2:"))
else if (startsWith(function_signature, "sha2:"))
{
/// Skip the last arg of trunc in substrait
for (int i = 0; i < args.size() - 1; i++)
parseFunctionArgument(actions_dag, parsed_args, function_name, args[i]);
}
Expand Down
1 change: 0 additions & 1 deletion cpp-ch/local-engine/Parser/SerializedPlanParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ static const std::map<std::string, std::string> SCALAR_FUNCTIONS
{"datediff", "dateDiff"},
{"second", "toSecond"},
{"add_months", "addMonths"},
{"trunc", ""}, /// dummy mapping
{"date_trunc", "dateTrunc"},
{"floor_datetime", "dateTrunc"},
{"months_between", "sparkMonthsBetween"},
Expand Down
79 changes: 79 additions & 0 deletions cpp-ch/local-engine/Parser/scalar_function_parser/trunc.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <Parser/FunctionParser.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/ActionsDAG.h>
#include <Poco/String.h>

namespace DB
{

namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}

}

namespace local_engine
{

class FunctionParserTrunc : public FunctionParser
{
public:
explicit FunctionParserTrunc(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) {}
~FunctionParserTrunc() override = default;

static constexpr auto name = "trunc";

String getName() const override { return name; }

const ActionsDAG::Node * parse(
const substrait::Expression_ScalarFunction & substrait_func,
ActionsDAGPtr & actions_dag) const override
{
auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag);
if (parsed_args.size() != 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires two arguments", getName());

const auto * date_arg = parsed_args[0];
const auto & fmt_field = substrait_func.arguments().at(1);
if (!fmt_field.value().has_literal() || !fmt_field.value().literal().has_string())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported fmt argument, should be a string literal, but: {}", fmt_field.DebugString());

const ActionsDAG::Node * result_node = nullptr;
const auto & field_value = Poco::toUpper(fmt_field.value().literal().string());
if (field_value == "YEAR" || field_value == "YYYY" || field_value == "YY")
result_node = toFunctionNode(actions_dag, "toStartOfYear", {date_arg});
else if (field_value == "QUARTER")
result_node = toFunctionNode(actions_dag, "toStartOfQuarter", {date_arg});
else if (field_value == "MONTH" || field_value == "MM" || field_value == "MON")
result_node = toFunctionNode(actions_dag, "toStartOfMonth", {date_arg});
else if (field_value == "WEEK")
{
const auto * mode_node = addColumnToActionsDAG(actions_dag, std::make_shared<DataTypeUInt8>(), 1);
result_node = toFunctionNode(actions_dag, "toStartOfWeek", {date_arg, mode_node});
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported fmt argument: {}", field_value);
return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag);
}
};

static FunctionParserRegister<FunctionParserTrunc> register_trunc;
}
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("groupBy.as")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
.exclude("function trunc")
.exclude("from_unixtime")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
.exclude("function trunc")
.exclude("from_unixtime")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
.exclude("function trunc")
.exclude("from_unixtime")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
Expand Down

0 comments on commit d11a61b

Please sign in to comment.