From bd483c9a0bcb64a0b99cad563ef3668e0907d6ff Mon Sep 17 00:00:00 2001 From: Zhichao Zhang Date: Fri, 10 Nov 2023 14:30:58 +0800 Subject: [PATCH] [GLUTEN-3668][CH] Performance regresses seriously after PR 3169 merge when executing convert string to date (#3671) PR #3169 add some functions to convert string to date, which leads to performance regression serious. Will be reverted first and raise another pr to fix the issue later. Test case, 2000W records: before PR #3169 : 3s after PR #3169: 3.6m Related PR: #3169, #3563; --- .../GlutenClickHouseTPCHParquetSuite.scala | 3 +- .../Parser/SerializedPlanParser.cpp | 43 ++----------------- 2 files changed, 6 insertions(+), 40 deletions(-) diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala index b8cfc8141443..f62a61a38740 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala @@ -2102,7 +2102,8 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite } } - test("GLUTEN-3135: Bug fix to_date") { + // ISSUE-3668, revert first + ignore("GLUTEN-3135: Bug fix to_date") { val create_table_sql = """ | create table test_tbl_3135(id bigint, data string) using parquet diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp index 5ab6281be4c8..ee86528c2c0c 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp @@ -1654,46 +1654,11 @@ const ActionsDAG::Node * SerializedPlanParser::parseExpression(ActionsDAGPtr act /// FIXME. Now we treet '1900-01-01' as null value. Not realy good. /// Updating `toDate32OrNull` to return null if the string is invalid is not acceptable by /// ClickHouse (https://github.com/ClickHouse/ClickHouse/issues/47120). - - /// isNotNull(toDate32OrNull(date)) - String to_date_function_name = "toDate32OrNull"; - const auto * date_node = toFunctionNode(actions_dag, to_date_function_name, args); - const auto * date_is_not_null_node = toFunctionNode(actions_dag, "isNotNull", {date_node}); - - /// isNotNull(toDate32(parseDateTimeOrNull(substring(trimLeft(date), 1, 10), '%Y-%m-%d')) - const auto * substr_offset_node = add_column(std::make_shared(), 1); - const auto * substr_length_node = add_column(std::make_shared(), 10); - const auto * trim_string_node = toFunctionNode(actions_dag, "trimLeft", {args[0]}); - const auto * substr_node = toFunctionNode(actions_dag, "substring", {trim_string_node, substr_offset_node, substr_length_node}); - const auto * date_format_node = add_column(std::make_shared(), "%Y-%m-%d"); - const auto * utc_time_zone_node = add_column(std::make_shared(), "UTC"); - const auto * parse_date_node = toFunctionNode(actions_dag, "parseDateTimeOrNull", {substr_node, date_format_node, utc_time_zone_node}); - const auto * parse_date_is_not_null_node = toFunctionNode(actions_dag, "isNotNull", {parse_date_node}); - - /// toDate32(parseDateTimeOrNull(substring(trimLeft(date), 1, 10), '%Y-%m-%d')) - const auto * date_node_from_parse = toFunctionNode(actions_dag, "toDate32", {parse_date_node}); - /// const null node - const auto * null_const_node = add_column(makeNullable(std::make_shared()), Field{}); - - /** - * Parse toDate(s) as - * if (isNotNull(toDate32OrNull)) - * toDate32OrNull(s) - * else if (isNotNull(parseDateTimeOrNull(substring(trimLeft(s)), 1, 10), '%Y-%m-%d')) - * toDate32(parseDateTimeOrNull(substring(trimLeft(s)), 1, 10), '%Y-%m-%d')) - * else - * null - */ - const auto * to_date_multi_if_node = toFunctionNode(actions_dag, "multiIf", { - date_is_not_null_node, - date_node, - parse_date_is_not_null_node, - date_node_from_parse, - null_const_node - }); + String function_name = "toDate32OrNull"; + const auto * date_node = toFunctionNode(actions_dag, function_name, args); const auto * zero_date_col_node = add_column(std::make_shared(), "1900-01-01"); - const auto * zero_date_node = toFunctionNode(actions_dag, to_date_function_name, {zero_date_col_node}); - DB::ActionsDAG::NodeRawConstPtrs nullif_args = {to_date_multi_if_node, zero_date_node}; + const auto * zero_date_node = toFunctionNode(actions_dag, function_name, {zero_date_col_node}); + DB::ActionsDAG::NodeRawConstPtrs nullif_args = {date_node, zero_date_node}; function_node = toFunctionNode(actions_dag, "nullIf", nullif_args); } else if (substrait_type.has_binary())