From 13eaebd94a6accc13641c67a9704ea8b85041c5e Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 5 Dec 2023 20:07:29 +0800 Subject: [PATCH] Bug fix log function diff --- .../GlutenClickHouseTPCHParquetSuite.scala | 11 +++ .../Parser/SerializedPlanParser.h | 3 - .../Parser/scalar_function_parser/ln.cpp | 35 +++++++++ .../Parser/scalar_function_parser/log10.cpp | 35 +++++++++ .../Parser/scalar_function_parser/log1p.cpp | 49 +----------- .../Parser/scalar_function_parser/log2.cpp | 35 +++++++++ .../Parser/scalar_function_parser/logarithm.h | 74 +++++++++++++++++++ 7 files changed, 194 insertions(+), 48 deletions(-) create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp create mode 100644 cpp-ch/local-engine/Parser/scalar_function_parser/logarithm.h diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala index 8baf5711c23d1..0329685e7721b 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala @@ -2217,5 +2217,16 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite spark.sql("drop table test_tbl_3521") } + test("GLUTEN-3934: log10/log2/ln") { + withSQLConf( + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> (ConstantFolding.ruleName + "," + NullPropagation.ruleName)) { + runQueryAndCompare( + "select log10(n_regionkey), log10(-1.0), log10(0), log10(n_regionkey - 100000), " + + "log2(n_regionkey), log2(-1.0), log2(0), log2(n_regionkey - 100000), " + + "ln(n_regionkey), ln(-1.0), ln(0), ln(n_regionkey - 100000) from nation" + )(checkOperatorMatch[ProjectExecTransformer]) + } + } + } // scalastyle:on line.size.limit diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h b/cpp-ch/local-engine/Parser/SerializedPlanParser.h index be12b4a653119..f2d7c36f67de2 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h @@ -107,9 +107,6 @@ static const std::map SCALAR_FUNCTIONS {"unhex", "unhex"}, {"hypot", "hypot"}, {"sign", "sign"}, - {"log10", "log10"}, - {"log2", "log2"}, - {"log", "log"}, {"radians", "radians"}, {"greatest", "greatest"}, {"least", "least"}, diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp new file mode 100644 index 0000000000000..423adb92d01b7 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/ln.cpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace local_engine +{ + +class FunctionParserLn : public FunctionParserLogBase +{ +public: + explicit FunctionParserLn(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {} + ~FunctionParserLn() override = default; + + static constexpr auto name = "log"; + + String getName() const override { return name; } + DB::Float64 getParameterLowerBoundValue() const override { return 0.0; } +}; + +static FunctionParserRegister register_ln; +} diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp new file mode 100644 index 0000000000000..fade79ace8957 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/log10.cpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace local_engine +{ + +class FunctionParserLog10 : public FunctionParserLogBase +{ +public: + explicit FunctionParserLog10(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {} + ~FunctionParserLog10() override = default; + + static constexpr auto name = "log10"; + + String getName() const override { return name; } + DB::Float64 getParameterLowerBoundValue() const override { return 0.0; } +}; + +static FunctionParserRegister register_log10; +} diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp index 57f620a4351df..8a877ef5e7a7d 100644 --- a/cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/log1p.cpp @@ -14,62 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} -} +#include namespace local_engine { -class FunctionParserLog1p : public FunctionParser +class FunctionParserLog1p : public FunctionParserLogBase { public: - explicit FunctionParserLog1p(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) {} + explicit FunctionParserLog1p(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {} ~FunctionParserLog1p() override = default; static constexpr auto name = "log1p"; String getName() const override { return name; } - - const ActionsDAG::Node * parse( - const substrait::Expression_ScalarFunction & substrait_func, - ActionsDAGPtr & actions_dag) const override - { - /* - parse log1p(x) as - if (x <= -1.0) - null - else - log1p(x) - */ - auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag); - if (parsed_args.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires exactly one arguments", getName()); - - const auto * arg_node = parsed_args[0]; - const auto * log1p_node = toFunctionNode(actions_dag, "log1p", {arg_node}); - - auto result_type = log1p_node->result_type; - auto nullable_result_type = makeNullable(result_type); - - const auto * null_const_node = addColumnToActionsDAG(actions_dag, nullable_result_type, Field()); - const auto * nullable_log1p_node = ActionsDAGUtil::convertNodeType(actions_dag, log1p_node, nullable_result_type->getName(), log1p_node->result_name); - - const auto * le_node = toFunctionNode(actions_dag, "lessOrEquals", {arg_node, addColumnToActionsDAG(actions_dag, result_type, -1.0)}); - const auto * result_node = toFunctionNode(actions_dag, "if", {le_node, null_const_node, nullable_log1p_node}); - - return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag); - } + DB::Float64 getParameterLowerBoundValue() const override { return -1.0; } }; static FunctionParserRegister register_log1p; diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp new file mode 100644 index 0000000000000..5f71f22cc62ff --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/log2.cpp @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +namespace local_engine +{ + +class FunctionParserLog2 : public FunctionParserLogBase +{ +public: + explicit FunctionParserLog2(SerializedPlanParser * plan_parser_) : FunctionParserLogBase(plan_parser_) {} + ~FunctionParserLog2() override = default; + + static constexpr auto name = "log2"; + + String getName() const override { return name; } + DB::Float64 getParameterLowerBoundValue() const override { return 0.0; } +}; + +static FunctionParserRegister register_log2; +} diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/logarithm.h b/cpp-ch/local-engine/Parser/scalar_function_parser/logarithm.h new file mode 100644 index 0000000000000..1acd22d2c4316 --- /dev/null +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/logarithm.h @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} +} + +namespace local_engine +{ +class FunctionParserLogBase : public FunctionParser +{ +public: + explicit FunctionParserLogBase(SerializedPlanParser * plan_parser_) : FunctionParser(plan_parser_) {} + ~FunctionParserLogBase() override = default; + + virtual DB::Float64 getParameterLowerBoundValue() const { return 0.0; } + + const ActionsDAG::Node * parse( + const substrait::Expression_ScalarFunction & substrait_func, + ActionsDAGPtr & actions_dag) const override + { + /* + parse log(x) as + if (x <= c) + null + else + log(c) + */ + auto parsed_args = parseFunctionArguments(substrait_func, "", actions_dag); + if (parsed_args.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires exactly one arguments", getName()); + + const auto * arg_node = parsed_args[0]; + const std::string function_name = getName(); + const auto * log_node = toFunctionNode(actions_dag, function_name, {arg_node}); + + auto result_type = log_node->result_type; + auto nullable_result_type = makeNullable(result_type); + + const auto * null_const_node = addColumnToActionsDAG(actions_dag, nullable_result_type, Field()); + const auto * nullable_log_node = ActionsDAGUtil::convertNodeType(actions_dag, log_node, nullable_result_type->getName(), log_node->result_name); + const DB::Float64 lowerBound = getParameterLowerBoundValue(); + const auto * le_node = toFunctionNode(actions_dag, "lessOrEquals", {arg_node, addColumnToActionsDAG(actions_dag, result_type, lowerBound)}); + const auto * result_node = toFunctionNode(actions_dag, "if", {le_node, null_const_node, nullable_log_node}); + + return convertNodeTypeIfNeeded(substrait_func, result_node, actions_dag); + } +}; + +}