Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GLUTEN-8253][CH] Fix cast failed when in-filter with tuple values #8256

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -3350,5 +3350,18 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr
compareResultsAgainstVanillaSpark(query_sql, true, { _ => })
spark.sql("drop table test_tbl_7759")
}

test("GLUTEN-8253: Fix cast failed when in-filter with tuple values") {
spark.sql("drop table if exists test_filter")
spark.sql("create table test_filter(c1 string, c2 string) using parquet")
spark.sql(s"""
|insert into test_filter values
|('a1', 'b1'), ('a2', 'b2'), ('a3', 'b3'), ('a4', 'b4'), ('a5', 'b5'),
|('a6', 'b6'), ('a7', 'b7'), ('a8', 'b8'), ('a9', 'b9'), ('a10', 'b10'),
|('a11', 'b11'), ('a12', null), (null, 'b13'), (null, null)
|""".stripMargin)
val sql = "select * from test_filter where (c1, c2) in (('a1', 'b1'), ('a2', 'b2'))"
compareResultsAgainstVanillaSpark(sql, true, { _ => })
}
}
// scalastyle:on line.size.limit
36 changes: 29 additions & 7 deletions cpp-ch/local-engine/Parser/ExpressionParser.cpp
Original file line number Diff line number Diff line change
@@ -418,12 +418,11 @@ const ActionsDAG::Node * ExpressionParser::parseExpression(ActionsDAG & actions_
}

DB::DataTypePtr elem_type;
std::tie(elem_type, std::ignore) = LiteralParser::parse(options[0].literal());
elem_type = wrapNullableType(nullable, elem_type);

DB::MutableColumnPtr elem_column = elem_type->createColumn();
elem_column->reserve(options_len);
for (int i = 0; i < options_len; ++i)
std::vector<std::pair<DB::DataTypePtr, DB::Field>> options_type_and_field;
auto first_option = LiteralParser::parse(options[0].literal());
elem_type = wrapNullableType(nullable, first_option.first);
options_type_and_field.emplace_back(first_option);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better add std::move

for (int i = 1; i < options_len; ++i)
{
auto type_and_field = LiteralParser::parse(options[i].literal());
auto option_type = wrapNullableType(nullable, type_and_field.first);
@@ -433,8 +432,31 @@ const ActionsDAG::Node * ExpressionParser::parseExpression(ActionsDAG & actions_
"SingularOrList options type mismatch:{} and {}",
elem_type->getName(),
option_type->getName());
options_type_and_field.emplace_back(type_and_field);
}

elem_column->insert(type_and_field.second);
// check tuple internal types
if (isTuple(elem_type) && isTuple(args[0]->result_type))
{
// align tuple inner types with nullable
auto tuple_type = std::static_pointer_cast<const DB::DataTypeTuple>(elem_type);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use checkAndGetDataType

auto result_type = std::static_pointer_cast<const DB::DataTypeTuple>(args[0]->result_type);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as above

assert(tuple_type->getElements().size() == result_type->getElements().size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use chassert

DataTypes new_types;
for (int i = 0; i < tuple_type->getElements().size(); ++i)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better to refactor it with castColumn. You could delete dozens of codes after refactor.

  1. Given the type of first argument in in expression
  2. Given the column of second argument in in expression
  3. Cast the column of second argument to the type of first argument.

{
auto tuple_elem_type = tuple_type->getElements()[i];
auto result_elem_type = result_type->getElements()[i];
if (result_elem_type->isNullable() && !tuple_elem_type->isNullable())
new_types.emplace_back(wrapNullableType(tuple_elem_type));
}
elem_type = std::make_shared<DB::DataTypeTuple>(new_types);
}
DB::MutableColumnPtr elem_column = elem_type->createColumn();
elem_column->reserve(options_len);
for (int i = 0; i < options_len; ++i)
{
elem_column->insert(options_type_and_field[i].second);
}
auto name = getUniqueName("__set");
ColumnWithTypeAndName elem_block{std::move(elem_column), elem_type, name};