diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala index 5dca4ad34714..6feb1e880622 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala @@ -3350,5 +3350,18 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr compareResultsAgainstVanillaSpark(query_sql, true, { _ => }) spark.sql("drop table test_tbl_7759") } + + test("GLUTEN-8253: Fix cast failed when in-filter with tuple values") { + spark.sql("drop table if exists test_filter") + spark.sql("create table test_filter(c1 string, c2 string) using parquet") + spark.sql(s""" + |insert into test_filter values + |('a1', 'b1'), ('a2', 'b2'), ('a3', 'b3'), ('a4', 'b4'), ('a5', 'b5'), + |('a6', 'b6'), ('a7', 'b7'), ('a8', 'b8'), ('a9', 'b9'), ('a10', 'b10'), + |('a11', 'b11'), ('a12', null), (null, 'b13'), (null, null) + |""".stripMargin) + val sql = "select * from test_filter where (c1, c2) in (('a1', 'b1'), ('a2', 'b2'))" + compareResultsAgainstVanillaSpark(sql, true, { _ => }) + } } // scalastyle:on line.size.limit diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp b/cpp-ch/local-engine/Parser/ExpressionParser.cpp index 91621b793142..ab4d8650d2b3 100644 --- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp +++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp @@ -419,12 +419,11 @@ const ActionsDAG::Node * ExpressionParser::parseExpression(ActionsDAG & actions_ } DB::DataTypePtr elem_type; - std::tie(elem_type, std::ignore) = LiteralParser::parse(options[0].literal()); - elem_type = wrapNullableType(nullable, elem_type); - - DB::MutableColumnPtr elem_column = elem_type->createColumn(); - elem_column->reserve(options_len); - for (int i = 0; i < options_len; ++i) + std::vector> options_type_and_field; + auto first_option = LiteralParser::parse(options[0].literal()); + elem_type = wrapNullableType(nullable, first_option.first); + options_type_and_field.emplace_back(std::move(first_option)); + for (int i = 1; i < options_len; ++i) { auto type_and_field = LiteralParser::parse(options[i].literal()); auto option_type = wrapNullableType(nullable, type_and_field.first); @@ -434,8 +433,23 @@ const ActionsDAG::Node * ExpressionParser::parseExpression(ActionsDAG & actions_ "SingularOrList options type mismatch:{} and {}", elem_type->getName(), option_type->getName()); + options_type_and_field.emplace_back(std::move(type_and_field)); + } - elem_column->insert(type_and_field.second); + // check tuple internal types + if (isTuple(elem_type) && isTuple(args[0]->result_type)) + { + // Spark guarantees that the types of tuples in the 'in' filter are completely consistent. + // See org.apache.spark.sql.types.DataType#equalsStructurally + // Additionally, the mapping from Spark types to ClickHouse types is one-to-one, See TypeParser.cpp + // So we can directly use the first tuple type as the type of the tuple to avoid nullable mismatch + elem_type = args[0]->result_type; + } + DB::MutableColumnPtr elem_column = elem_type->createColumn(); + elem_column->reserve(options_len); + for (int i = 0; i < options_len; ++i) + { + elem_column->insert(options_type_and_field[i].second); } auto name = getUniqueName("__set"); ColumnWithTypeAndName elem_block{std::move(elem_column), elem_type, name};