From bf42fd768ece392992bfb2f98b193c3ca5332298 Mon Sep 17 00:00:00 2001 From: KevinyhZou <37431499+KevinyhZou@users.noreply.github.com> Date: Wed, 13 Dec 2023 10:23:09 +0800 Subject: [PATCH] [GLUTEN-3934][CH]Bug fix log function diff (#3935) What changes were proposed in this pull request? (Please fill in changes proposed in this fix) (Fixes: #3934) How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) TEST BY UT --- .../GlutenClickHouseTPCHParquetSuite.scala | 11 +++ .../Parser/SerializedPlanParser.h | 3 - .../Parser/scalar_function_parser/ln.cpp | 39 ++++++++++ .../Parser/scalar_function_parser/log10.cpp | 39 ++++++++++ .../Parser/scalar_function_parser/log1p.cpp | 49 ++---------- .../Parser/scalar_function_parser/log2.cpp | 39 ++++++++++ .../Parser/scalar_function_parser/logarithm.h | 77 +++++++++++++++++++ 7 files changed, 211 insertions(+), 46 deletions(-) create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/logarithm.h diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala index 8121a2251731..dc6b6914f01a 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala @@ -2242,5 +2242,16 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite } } + test("GLUTEN-3934: log10/log2/ln") { + withSQLConf( + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> (ConstantFolding.ruleName + "," + NullPropagation.ruleName)) { + runQueryAndCompare( + "select log10(n_regionkey), log10(-1.0), log10(0), log10(n_regionkey - 100000), " + + "log2(n_regionkey), log2(-1.0), log2(0), log2(n_regionkey - 100000), " + + "ln(n_regionkey), ln(-1.0), ln(0), ln(n_regionkey - 100000) from nation" + )(checkOperatorMatch[ProjectExecTransformer]) + } + } + } // scalastyle:on line.size.limit diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h b/cpp-ch/local-engine/Parser/SerializedPlanParser.h index 4c2a17a6efec..48e4012ec59f 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h @@ -107,9 +107,6 @@ static const std::map SCALAR_FUNCTIONS {"unhex", "unhex"}, {"hypot", "hypot"}, {"sign", "sign"}, - {"log10", "log10"}, - {"log2", "log2"}, - {"log", "log"}, {"radians", "radians"}, {"greatest", "greatest"}, {"least", "least"}, diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp new file mode 100644 index 000000000000..452311483d8e --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace local_engine +{ + +class FunctionParserLn : public FunctionParserLogBase +{ +public: + explicit FunctionParserLn(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {} + ~FunctionParserLn() override = default; + + static constexpr auto name = "log"; + + String getName() const override { return name; } + String getCHFunctionName() const override { return "log"; } + const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr & actions_dag, const DataTypePtr & data_type) const override + { + return addColumnToActionsDAG(actions_dag, data_type, 0.0); + } +}; + +static FunctionParserRegister register_ln; +} diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp new file mode 100644 index 000000000000..191ca1187512 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace local_engine +{ + +class FunctionParserLog10 : public FunctionParserLogBase +{ +public: + explicit FunctionParserLog10(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {} + ~FunctionParserLog10() override = default; + + static constexpr auto name = "log10"; + + String getName() const override { return name; } + String getCHFunctionName() const override { return "log10"; } + const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr & actions_dag, const DataTypePtr & data_type) const override + { + return addColumnToActionsDAG(actions_dag, data_type, 0.0); + } +}; + +static FunctionParserRegister register_log10; +} diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp index 57f620a4351d..d669c1eab88a 100644 --- a/cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp @@ -14,61 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} -} +#include namespace local_engine { -class FunctionParserLog1p : public FunctionParser +class FunctionParserLog1p : public FunctionParserLogBase { public: - explicit FunctionParserLog1p(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) {} + explicit FunctionParserLog1p(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {} ~FunctionParserLog1p() override = default; static constexpr auto name = "log1p"; String getName() const override { return name; } - - const ActionsDAG::Node * parse( - const substrait::Expression_ScalarFunction & substrait_func, - ActionsDAGPtr & actions_dag) const override + String getCHFunctionName() const override { return "log1p"; } + const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr & actions_dag, const DataTypePtr & data_type) const override { - /* - parse log1p(x) as - if (x <= -1.0) - null - else - log1p(x) - */ - auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag); - if (parsed_args.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires exactly one arguments", getName()); - - const auto * arg_node = parsed_args[0]; - const auto * log1p_node = toFunctionNode(actions_dag, "log1p", {arg_node}); - - auto result_type = log1p_node->result_type; - auto nullable_result_type = makeNullable(result_type); - - const auto * null_const_node = addColumnToActionsDAG(actions_dag, nullable_result_type, Field()); - const auto * nullable_log1p_node = ActionsDAGUtil::convertNodeType(actions_dag, log1p_node, nullable_result_type->getName(), log1p_node->result_name); - - const auto * le_node = toFunctionNode(actions_dag, "lessOrEquals", {arg_node, addColumnToActionsDAG(actions_dag, result_type, -1.0)}); - const auto * result_node = toFunctionNode(actions_dag, "if", {le_node, null_const_node, nullable_log1p_node}); - - return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag); + return addColumnToActionsDAG(actions_dag, data_type, -1.0); } }; diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp new file mode 100644 index 000000000000..463795be9c72 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace local_engine +{ + +class FunctionParserLog2 : public FunctionParserLogBase +{ +public: + explicit FunctionParserLog2(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {} + ~FunctionParserLog2() override = default; + + static constexpr auto name = "log2"; + + String getName() const override { return name; } + String getCHFunctionName() const override { return "log2"; } + const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr & actions_dag, const DataTypePtr & data_type) const override + { + return addColumnToActionsDAG(actions_dag, data_type, 0.0); + } +}; + +static FunctionParserRegister register_log2; +} diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/logarithm.h b/cpp-ch/local-engine/Parser/scalar_function_parser/logarithm.h new file mode 100644 index 000000000000..f46e6b83136e --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/logarithm.h @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int NOT_IMPLEMENTED; +} +} + +namespace local_engine +{ +class FunctionParserLogBase : public FunctionParser +{ +public: + explicit FunctionParserLogBase(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) {} + ~FunctionParserLogBase() override = default; + + virtual DB::String getCHFunctionName() const { return "log"; } + virtual const DB::ActionsDAG::Node * getParameterLowerBound(ActionsDAGPtr &, const DataTypePtr &) const { return nullptr; } + + const ActionsDAG::Node * parse( + const substrait::Expression_ScalarFunction & substrait_func, + ActionsDAGPtr & actions_dag) const override + { + /* + parse log(x) as + if (x <= c) + null + else + log(x) + */ + auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag); + if (parsed_args.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires exactly one arguments", getName()); + + const auto * arg_node = parsed_args[0]; + + const std::string ch_function_name = getCHFunctionName(); + const auto * log_node = toFunctionNode(actions_dag, ch_function_name, {arg_node}); + auto nullable_result_type = makeNullable(log_node->result_type); + + const auto * null_const_node = addColumnToActionsDAG(actions_dag, nullable_result_type, Field()); + const auto * lower_bound_node = getParameterLowerBound(actions_dag, arg_node->result_type); + if (!lower_bound_node) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Vritual function {} may not implement for {}", "getParameterLowerBound", getName()); + + const auto * le_node = toFunctionNode(actions_dag, "lessOrEquals", {arg_node, lower_bound_node}); + const auto * result_node = toFunctionNode(actions_dag, "if", {le_node, null_const_node, log_node}); + + return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag); + } +}; + +}