From 6338e641857d9acfacb53c202531c0c99598ae8a Mon Sep 17 00:00:00 2001 From: xmy Date: Tue, 28 Nov 2023 11:31:05 +0800 Subject: [PATCH] [GLUTEN-3861][CH] Fix parse exception when join postJoinFilter contains singularOrList --- .../GlutenClickHouseTPCHParquetSuite.scala | 21 +++++++++++++++++++ cpp-ch/local-engine/Parser/JoinRelParser.cpp | 2 +- .../Parser/SerializedPlanParser.cpp | 12 ++++++++--- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala index 32f8d03b7b4b..8121a2251731 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetSuite.scala @@ -2186,6 +2186,27 @@ class GlutenClickHouseTPCHParquetSuite extends GlutenClickHouseTPCHAbstractSuite } } + test("GLUTEN-3861: Fix parse exception when join postJoinFilter contains singularOrList") { + withSQLConf(("spark.sql.autoBroadcastJoinThreshold", "-1")) { + val sql = + """ + |select t1.l_orderkey, t1.l_year, t2.o_orderkey, t2.o_year + |from ( + | select l_orderkey, extract(year from l_shipdate) as l_year, count(1) as l_cnt + | from lineitem + | group by l_orderkey, l_shipdate) t1 + |left join ( + | select o_orderkey, extract(year from o_orderdate) as o_year, count(1) as o_cnt + | from orders + | group by o_orderkey, o_orderdate) t2 + |on t1.l_orderkey = t2.o_orderkey + | and l_year in (1997, 1995, 1993) + |order by t1.l_orderkey, t1.l_year, t2.o_orderkey, t2.o_year + |""".stripMargin + compareResultsAgainstVanillaSpark(sql, true, { _ => }) + } + } + test("GLUTEN-3467: Fix 'Names of tuple elements must be unique' error for ch backend") { val sql = """ diff --git a/cpp-ch/local-engine/Parser/JoinRelParser.cpp b/cpp-ch/local-engine/Parser/JoinRelParser.cpp index a71c5fd348bc..0e902f00a09a 100644 --- a/cpp-ch/local-engine/Parser/JoinRelParser.cpp +++ b/cpp-ch/local-engine/Parser/JoinRelParser.cpp @@ -397,7 +397,7 @@ bool JoinRelParser::tryAddPushDownFilter( } } } - // if ch not support the join type or join conditions, it will throw an exception like 'not support'. + // if ch does not support the join type or join conditions, it will throw an exception like 'not support'. catch (Poco::Exception & e) { // CH not support join condition has 'or' and has different table in each side. diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp index 0986ee43a1c7..49a2a7e96810 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -1883,7 +1884,7 @@ ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & name_and_typ size_t(0), name_and_types, std::make_shared(name_and_types), - nullptr /* prepared_sets */, + std::make_shared(), false /* no_subqueries */, false /* no_makeset */, false /* only_consts */, @@ -1895,6 +1896,8 @@ ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & name_and_typ ASTPtr ASTParser::parseToAST(const Names & names, const substrait::Expression & rel) { LOG_DEBUG(&Poco::Logger::get("ASTParser"), "substrait plan:\n{}", rel.DebugString()); + if (rel.has_singular_or_list()) + return parseArgumentToAST(names, rel); if (!rel.has_scalar_function()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "the root of expression should be a scalar function:\n {}", rel.DebugString()); @@ -2000,7 +2003,8 @@ ASTPtr ASTParser::parseArgumentToAST(const Names & names, const substrait::Expre bool nullable = false; size_t options_len = options.size(); - args.reserve(options_len); + ASTs in_args; + in_args.reserve(options_len); for (int i = 0; i < static_cast(options_len); ++i) { @@ -2023,8 +2027,10 @@ ASTPtr ASTParser::parseArgumentToAST(const Names & names, const substrait::Expre elem_type->getName(), option_type->getName()); - args.emplace_back(std::make_shared(type_and_field.second)); + in_args.emplace_back(std::make_shared(type_and_field.second)); } + auto array_ast = makeASTFunction("array", in_args); + args.emplace_back(array_ast); auto ast = makeASTFunction("in", args); if (nullable)