From d3cae5625c870e2870a376eb259a87fb94cd979e Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 25 Jul 2024 18:45:50 +0800 Subject: [PATCH 1/7] fix style --- .../scalar_function_parser/CommonScalarFunctionParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp index e4855b507f90..726d1683dbff 100644 --- a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp @@ -64,7 +64,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToUnixTimestamp, to_unix_timestamp, parse REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Position, positive, identity); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Negative, negative, negate); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Pmod, pmod, pmod); -REGISTER_COMMON_SCALAR_FUNCTION_PARSER(abs, abs, abs); +REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Abs, abs, abs); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Ceil, ceil, ceil); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Round, round, roundHalfUp); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Bround, bround, roundBankers); From d632b5cd7a465255a66e04868f2629235b24ae71 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 26 Jul 2024 10:33:29 +0800 Subject: [PATCH 2/7] fix issue https://github.com/apache/incubator-gluten/issues/6561 --- .../scalar_function_parser/arrayHighOrderFunctions.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp index eacd72ed044f..e509f12c9a6c 100644 --- a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp @@ -20,6 +20,7 @@ #include #include #include +#include "DataTypes/DataTypeArray.h" #include #include #include @@ -90,7 +91,14 @@ class ArrayTransform : public FunctionParser assert(parsed_args.size() == 2); if (lambda_args.size() == 1) { - return toFunctionNode(actions_dag, ch_func_name, {parsed_args[1], parsed_args[0]}); + /// Convert Array(T) to Array(U) if needed, Array(T) is the type of the first argument of transform. + /// U is the argument type of lambda function. In some cases Array(T) is not equal to Array(U). + /// e.g. in the second query of https://github.com/apache/incubator-gluten/issues/6561, T is String, and U is Nullable(String) + /// The difference of both types will result in runtime exceptions in function capture. + auto dst_array_type = std::make_shared(lambda_args.front().type); + const auto * dst_array_arg = ActionsDAGUtil::convertNodeType(actions_dag, parsed_args[0], dst_array_type->getName()); + std::cout << "actions_dag:" << actions_dag->dumpDAG() << std::endl; + return toFunctionNode(actions_dag, ch_func_name, {parsed_args[1], dst_array_arg}); } /// transform with index argument. From f678141ea633defe951b410e77d870f286754687 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 26 Jul 2024 15:00:28 +0800 Subject: [PATCH 3/7] add uts --- ...lutenClickHouseNativeWriteTableSuite.scala | 38 +++++-------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala index 99f946cd7dc8..4c993e226895 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala @@ -903,38 +903,20 @@ class GlutenClickHouseNativeWriteTableSuite | ) partitioned by (day string) | stored as $format""".stripMargin - // FIXME: - // Spark analyzer(>=3.4) will resolve map type to - // map_from_arrays(transform(map_keys(map('t1','a','t2','b')), v->v), - // transform(map_values(map('t1','a','t2','b')), v->v)) - // which cause core dump. see https://github.com/apache/incubator-gluten/issues/6561 - // for details. val insert_sql = - if (isSparkVersionLE("3.3")) { - s"""insert overwrite $table_name partition (day) - |select id as a, - | str_to_map(concat('t1:','a','&t2:','b'),'&',':'), - | struct('1', null) as c, - | '2024-01-08' as day - |from range(10)""".stripMargin - } else { - s"""insert overwrite $table_name partition (day) - |select id as a, - | map('t1', 'a', 't2', 'b'), - | struct('1', null) as c, - | '2024-01-08' as day - |from range(10)""".stripMargin - } + s"""insert overwrite $table_name partition (day) + |select id as a, + | map('t1', 'a', 't2', 'b'), + | struct('1', null) as c, + | '2024-01-08' as day + |from range(10)""".stripMargin (table_name, create_sql, insert_sql) }, (table_name, _) => - if (isSparkVersionGE("3.4")) { - // FIXME: Don't Know Why Failed - compareResultsAgainstVanillaSpark( - s"select * from $table_name", - compareResult = true, - _ => {}) - } + compareResultsAgainstVanillaSpark( + s"select * from $table_name", + compareResult = true, + _ => {}) ) } } From 5b4191314755ae381f751cbd0905df024bbe5258 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 26 Jul 2024 15:02:17 +0800 Subject: [PATCH 4/7] add uts --- .../execution/GlutenClickHouseNativeWriteTableSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala index 4c993e226895..7b81924a8882 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala @@ -906,7 +906,7 @@ class GlutenClickHouseNativeWriteTableSuite val insert_sql = s"""insert overwrite $table_name partition (day) |select id as a, - | map('t1', 'a', 't2', 'b'), + | str_to_map(concat('t1:','a','&t2:','b'),'&',':'), | struct('1', null) as c, | '2024-01-08' as day |from range(10)""".stripMargin From 8df6ee447bca721982b87e6b84c6379e999ae7e5 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 26 Jul 2024 15:48:54 +0800 Subject: [PATCH 5/7] fix uts --- cpp-ch/local-engine/Common/CHUtil.cpp | 15 ++++++++++++++- cpp-ch/local-engine/Common/CHUtil.h | 7 +++++++ .../arrayHighOrderFunctions.cpp | 8 +++++--- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp b/cpp-ch/local-engine/Common/CHUtil.cpp index 1be9c09a8c26..92f3b612ba63 100644 --- a/cpp-ch/local-engine/Common/CHUtil.cpp +++ b/cpp-ch/local-engine/Common/CHUtil.cpp @@ -477,6 +477,19 @@ const DB::ActionsDAG::Node * ActionsDAGUtil::convertNodeType( DB::createInternalCastOverloadResolver(cast_type, std::move(diagnostic)), std::move(children), result_name); } +const DB::ActionsDAG::Node * ActionsDAGUtil::convertNodeTypeIfNeeded( + DB::ActionsDAGPtr & actions_dag, + const DB::ActionsDAG::Node * node, + const DB::DataTypePtr & dst_type, + const std::string & result_name, + CastType cast_type) +{ + if (node->result_type->equals(*dst_type)) + return node; + + return convertNodeType(actions_dag, node, dst_type->getName(), result_name, cast_type); +} + String QueryPipelineUtil::explainPipeline(DB::QueryPipeline & pipeline) { DB::WriteBufferFromOwnString buf; @@ -844,7 +857,7 @@ void BackendInitializerUtil::initContexts(DB::Context::ConfigurationPtr config) size_t index_uncompressed_cache_size = config->getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE); double index_uncompressed_cache_size_ratio = config->getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO); global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio); - + String index_mark_cache_policy = config->getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY); size_t index_mark_cache_size = config->getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE); double index_mark_cache_size_ratio = config->getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO); diff --git a/cpp-ch/local-engine/Common/CHUtil.h b/cpp-ch/local-engine/Common/CHUtil.h index 98139fb49a5b..ef05fc525795 100644 --- a/cpp-ch/local-engine/Common/CHUtil.h +++ b/cpp-ch/local-engine/Common/CHUtil.h @@ -132,6 +132,13 @@ class ActionsDAGUtil const std::string & type_name, const std::string & result_name = "", DB::CastType cast_type = DB::CastType::nonAccurate); + + static const DB::ActionsDAG::Node * convertNodeTypeIfNeeded( + DB::ActionsDAGPtr & actions_dag, + const DB::ActionsDAG::Node * node, + const DB::DataTypePtr & dst_type, + const std::string & result_name = "", + DB::CastType cast_type = DB::CastType::nonAccurate); }; class QueryPipelineUtil diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp index e509f12c9a6c..5cb5bf34517b 100644 --- a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp @@ -95,9 +95,11 @@ class ArrayTransform : public FunctionParser /// U is the argument type of lambda function. In some cases Array(T) is not equal to Array(U). /// e.g. in the second query of https://github.com/apache/incubator-gluten/issues/6561, T is String, and U is Nullable(String) /// The difference of both types will result in runtime exceptions in function capture. - auto dst_array_type = std::make_shared(lambda_args.front().type); - const auto * dst_array_arg = ActionsDAGUtil::convertNodeType(actions_dag, parsed_args[0], dst_array_type->getName()); - std::cout << "actions_dag:" << actions_dag->dumpDAG() << std::endl; + const auto & src_array_type = parsed_args[0]->result_type; + DataTypePtr dst_array_type = std::make_shared(lambda_args.front().type); + if (isNullableOrLowCardinalityNullable(src_array_type)) + dst_array_type = std::make_shared(dst_array_type); + const auto * dst_array_arg = ActionsDAGUtil::convertNodeTypeIfNeeded(actions_dag, parsed_args[0], dst_array_type); return toFunctionNode(actions_dag, ch_func_name, {parsed_args[1], dst_array_arg}); } From ac8bfe0b42bff88846554fb93ce9f5eed18dd22f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 26 Jul 2024 15:51:51 +0800 Subject: [PATCH 6/7] fix style --- .../arrayHighOrderFunctions.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp index 5cb5bf34517b..f9f093cbad50 100644 --- a/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/arrayHighOrderFunctions.cpp @@ -15,17 +15,17 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include "DataTypes/DataTypeArray.h" +#include +#include #include #include -#include +#include #include #include +#include +#include +#include +#include namespace DB::ErrorCodes { From 3033a133f2b509b07ebb2d1bb90373e20085bf87 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 30 Jul 2024 15:54:25 +0800 Subject: [PATCH 7/7] ignore some checks when spark 3.3 --- .../GlutenClickHouseNativeWriteTableSuite.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala index 7b81924a8882..578c43292747 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseNativeWriteTableSuite.scala @@ -913,10 +913,12 @@ class GlutenClickHouseNativeWriteTableSuite (table_name, create_sql, insert_sql) }, (table_name, _) => - compareResultsAgainstVanillaSpark( - s"select * from $table_name", - compareResult = true, - _ => {}) + if (isSparkVersionGE("3.4")) { + compareResultsAgainstVanillaSpark( + s"select * from $table_name", + compareResult = true, + _ => {}) + } ) } }