Skip to content

Commit

Permalink
[GLUTEN-8253][CH] Fix cast failed when in-filter with tuple values (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
lwz9103 authored Dec 24, 2024
1 parent f7f801a commit 1d12c4a
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3350,5 +3350,18 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr
compareResultsAgainstVanillaSpark(query_sql, true, { _ => })
spark.sql("drop table test_tbl_7759")
}

test("GLUTEN-8253: Fix cast failed when in-filter with tuple values") {
spark.sql("drop table if exists test_filter")
spark.sql("create table test_filter(c1 string, c2 string) using parquet")
spark.sql(s"""
|insert into test_filter values
|('a1', 'b1'), ('a2', 'b2'), ('a3', 'b3'), ('a4', 'b4'), ('a5', 'b5'),
|('a6', 'b6'), ('a7', 'b7'), ('a8', 'b8'), ('a9', 'b9'), ('a10', 'b10'),
|('a11', 'b11'), ('a12', null), (null, 'b13'), (null, null)
|""".stripMargin)
val sql = "select * from test_filter where (c1, c2) in (('a1', 'b1'), ('a2', 'b2'))"
compareResultsAgainstVanillaSpark(sql, true, { _ => })
}
}
// scalastyle:on line.size.limit
28 changes: 21 additions & 7 deletions cpp-ch/local-engine/Parser/ExpressionParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,11 @@ const ActionsDAG::Node * ExpressionParser::parseExpression(ActionsDAG & actions_
}

DB::DataTypePtr elem_type;
std::tie(elem_type, std::ignore) = LiteralParser::parse(options[0].literal());
elem_type = wrapNullableType(nullable, elem_type);

DB::MutableColumnPtr elem_column = elem_type->createColumn();
elem_column->reserve(options_len);
for (int i = 0; i < options_len; ++i)
std::vector<std::pair<DB::DataTypePtr, DB::Field>> options_type_and_field;
auto first_option = LiteralParser::parse(options[0].literal());
elem_type = wrapNullableType(nullable, first_option.first);
options_type_and_field.emplace_back(std::move(first_option));
for (int i = 1; i < options_len; ++i)
{
auto type_and_field = LiteralParser::parse(options[i].literal());
auto option_type = wrapNullableType(nullable, type_and_field.first);
Expand All @@ -434,8 +433,23 @@ const ActionsDAG::Node * ExpressionParser::parseExpression(ActionsDAG & actions_
"SingularOrList options type mismatch:{} and {}",
elem_type->getName(),
option_type->getName());
options_type_and_field.emplace_back(std::move(type_and_field));
}

elem_column->insert(type_and_field.second);
// check tuple internal types
if (isTuple(elem_type) && isTuple(args[0]->result_type))
{
// Spark guarantees that the types of tuples in the 'in' filter are completely consistent.
// See org.apache.spark.sql.types.DataType#equalsStructurally
// Additionally, the mapping from Spark types to ClickHouse types is one-to-one, See TypeParser.cpp
// So we can directly use the first tuple type as the type of the tuple to avoid nullable mismatch
elem_type = args[0]->result_type;
}
DB::MutableColumnPtr elem_column = elem_type->createColumn();
elem_column->reserve(options_len);
for (int i = 0; i < options_len; ++i)
{
elem_column->insert(options_type_and_field[i].second);
}
auto name = getUniqueName("__set");
ColumnWithTypeAndName elem_block{std::move(elem_column), elem_type, name};
Expand Down

0 comments on commit 1d12c4a

Please sign in to comment.