diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala index f82acdc415b2..60767fa7c527 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala @@ -3350,5 +3350,18 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr compareResultsAgainstVanillaSpark(query_sql, true, { _ => }) spark.sql("drop table test_tbl_7759") } + + test("GLUTEN-8253: Fix cast failed when in-filter with tuple values") { + spark.sql("drop table if exists test_filter") + spark.sql("create table test_filter(c1 string, c2 string) using parquet") + spark.sql(s""" + |insert into test_filter values + |('a1', 'b1'), ('a2', 'b2'), ('a3', 'b3'), ('a4', 'b4'), ('a5', 'b5'), + |('a6', 'b6'), ('a7', 'b7'), ('a8', 'b8'), ('a9', 'b9'), ('a10', 'b10'), + |('a11', 'b11'), ('a12', null), (null, 'b13'), (null, null) + |""".stripMargin) + val sql = "select * from test_filter where (c1, c2) in (('a1', 'b1'), ('a2', 'b2'))" + compareResultsAgainstVanillaSpark(sql, true, { _ => }) + } } // scalastyle:on line.size.limit diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp b/cpp-ch/local-engine/Parser/ExpressionParser.cpp index e7de46483eda..4f93d20cf7e4 100644 --- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp +++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp @@ -418,23 +418,46 @@ const ActionsDAG::Node * ExpressionParser::parseExpression(ActionsDAG & actions_ } DB::DataTypePtr elem_type; - std::tie(elem_type, std::ignore) = LiteralParser::parse(options[0].literal()); - elem_type = wrapNullableType(nullable, elem_type); - - DB::MutableColumnPtr elem_column = elem_type->createColumn(); - elem_column->reserve(options_len); - for (int i = 0; i < options_len; ++i) + std::vector> options_type_and_field; + auto first_option = LiteralParser::parse(options[0].literal()); + elem_type = first_option.first; + options_type_and_field.emplace_back(first_option); + for (int i = 1; i < options_len; ++i) { auto type_and_field = LiteralParser::parse(options[i].literal()); - auto option_type = wrapNullableType(nullable, type_and_field.first); + auto option_type = type_and_field.first; if (!elem_type->equals(*option_type)) throw DB::Exception( DB::ErrorCodes::LOGICAL_ERROR, "SingularOrList options type mismatch:{} and {}", elem_type->getName(), option_type->getName()); + options_type_and_field.emplace_back(type_and_field); + } - elem_column->insert(type_and_field.second); + // check tuple internal types + if (isTuple(elem_type) && isTuple(args[0]->result_type)) + { + // align tuple inner types with nullable + auto tuple_type = std::static_pointer_cast(elem_type); + auto result_type = std::static_pointer_cast(args[0]->result_type); + assert(tuple_type->getElements().size() == result_type->getElements().size()); + DataTypes new_types; + for (int i = 0; i < tuple_type->getElements().size(); ++i) + { + auto tuple_elem_type = tuple_type->getElements()[i]; + auto result_elem_type = result_type->getElements()[i]; + if (result_elem_type->isNullable() && !tuple_elem_type->isNullable()) + new_types.emplace_back(wrapNullableType(tuple_elem_type)); + } + elem_type = std::make_shared(new_types); + } + elem_type = wrapNullableType(nullable, elem_type); + DB::MutableColumnPtr elem_column = elem_type->createColumn(); + elem_column->reserve(options_len); + for (int i = 0; i < options_len; ++i) + { + elem_column->insert(options_type_and_field[i].second); } auto name = getUniqueName("__set"); ColumnWithTypeAndName elem_block{std::move(elem_column), elem_type, name};