Skip to content

Commit

Permalink
[GLUTEN-3668][CH] Performance regresses seriously after PR 3169 merge…
Browse files Browse the repository at this point in the history
… when executing convert string to date (#3671)

PR #3169 add some functions to convert string to date, which leads to performance regression serious.
Will be reverted first and raise another pr to fix the issue later.

Test case, 2000W records:
before PR #3169 : 3s
after PR #3169: 3.6m

Related PR: #3169, #3563;
  • Loading branch information
zzcclp authored Nov 10, 2023
1 parent 6be077d commit bd483c9
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2102,7 +2102,8 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite
}
}

test("GLUTEN-3135: Bug fix to_date") {
// ISSUE-3668, revert first
ignore("GLUTEN-3135: Bug fix to_date") {
val create_table_sql =
"""
| create table test_tbl_3135(id bigint, data string) using parquet
Expand Down
43 changes: 4 additions & 39 deletions cpp-ch/local-engine/Parser/SerializedPlanParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1654,46 +1654,11 @@ const ActionsDAG::Node * SerializedPlanParser::parseExpression(ActionsDAGPtr act
/// FIXME. Now we treet '1900-01-01' as null value. Not realy good.
/// Updating `toDate32OrNull` to return null if the string is invalid is not acceptable by
/// ClickHouse (https://github.com/ClickHouse/ClickHouse/issues/47120).

/// isNotNull(toDate32OrNull(date))
String to_date_function_name = "toDate32OrNull";
const auto * date_node = toFunctionNode(actions_dag, to_date_function_name, args);
const auto * date_is_not_null_node = toFunctionNode(actions_dag, "isNotNull", {date_node});

/// isNotNull(toDate32(parseDateTimeOrNull(substring(trimLeft(date), 1, 10), '%Y-%m-%d'))
const auto * substr_offset_node = add_column(std::make_shared<DataTypeInt32>(), 1);
const auto * substr_length_node = add_column(std::make_shared<DataTypeInt32>(), 10);
const auto * trim_string_node = toFunctionNode(actions_dag, "trimLeft", {args[0]});
const auto * substr_node = toFunctionNode(actions_dag, "substring", {trim_string_node, substr_offset_node, substr_length_node});
const auto * date_format_node = add_column(std::make_shared<DataTypeString>(), "%Y-%m-%d");
const auto * utc_time_zone_node = add_column(std::make_shared<DataTypeString>(), "UTC");
const auto * parse_date_node = toFunctionNode(actions_dag, "parseDateTimeOrNull", {substr_node, date_format_node, utc_time_zone_node});
const auto * parse_date_is_not_null_node = toFunctionNode(actions_dag, "isNotNull", {parse_date_node});

/// toDate32(parseDateTimeOrNull(substring(trimLeft(date), 1, 10), '%Y-%m-%d'))
const auto * date_node_from_parse = toFunctionNode(actions_dag, "toDate32", {parse_date_node});
/// const null node
const auto * null_const_node = add_column(makeNullable(std::make_shared<DataTypeDate32>()), Field{});

/**
* Parse toDate(s) as
* if (isNotNull(toDate32OrNull))
* toDate32OrNull(s)
* else if (isNotNull(parseDateTimeOrNull(substring(trimLeft(s)), 1, 10), '%Y-%m-%d'))
* toDate32(parseDateTimeOrNull(substring(trimLeft(s)), 1, 10), '%Y-%m-%d'))
* else
* null
*/
const auto * to_date_multi_if_node = toFunctionNode(actions_dag, "multiIf", {
date_is_not_null_node,
date_node,
parse_date_is_not_null_node,
date_node_from_parse,
null_const_node
});
String function_name = "toDate32OrNull";
const auto * date_node = toFunctionNode(actions_dag, function_name, args);
const auto * zero_date_col_node = add_column(std::make_shared<DataTypeString>(), "1900-01-01");
const auto * zero_date_node = toFunctionNode(actions_dag, to_date_function_name, {zero_date_col_node});
DB::ActionsDAG::NodeRawConstPtrs nullif_args = {to_date_multi_if_node, zero_date_node};
const auto * zero_date_node = toFunctionNode(actions_dag, function_name, {zero_date_col_node});
DB::ActionsDAG::NodeRawConstPtrs nullif_args = {date_node, zero_date_node};
function_node = toFunctionNode(actions_dag, "nullIf", nullif_args);
}
else if (substrait_type.has_binary())
Expand Down

0 comments on commit bd483c9

Please sign in to comment.