Skip to content

Commit

Permalink
[GLUTEN-4452] [CH] fix may get wrong hash table when multi joins in a…
Browse files Browse the repository at this point in the history
… task (#4453)

What changes were proposed in this pull request?
This pr fix the bug that it may get wrong hash table when there are multi joins in a task

(Fixes: #4452)

How was this patch tested?
This patch was tested by manual tests.
  • Loading branch information
shuai-xu authored Feb 23, 2024
1 parent 6febf8a commit 541cb54
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ case class ClickHouseBuildSideRelation(
override def asReadOnlyCopy(): ClickHouseBuildSideRelation = this

private var hashTableData: Long = 0L

def buildHashTable(
broadCastContext: BroadCastHashJoinContext): (Long, ClickHouseBuildSideRelation) =
synchronized {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,31 @@ class GlutenClickHouseTPCDSParquetGraceHashJoinSuite extends GlutenClickHouseTPC
}
}

test("Gluten-4452: Fix get wrong hash table when multi joins in a task") {
val testSql =
"""
| SELECT ws_item_sk, ws_sold_date_sk, ws_ship_date_sk,
| t3.d_date_id as sold_date_id, t2.d_date_id as ship_date_id
| FROM (
| SELECT ws_item_sk, ws_sold_date_sk, ws_ship_date_sk, t1.d_date_id
| FROM web_sales
| LEFT JOIN
| (SELECT d_date_id, d_date_sk from date_dim GROUP BY d_date_id, d_date_sk) t1
| ON ws_sold_date_sk == t1.d_date_sk) t3
| INNER JOIN
| (SELECT d_date_id, d_date_sk from date_dim GROUP BY d_date_id, d_date_sk) t2
| ON ws_ship_date_sk == t2.d_date_sk
| LIMIT 100;
|""".stripMargin
compareResultsAgainstVanillaSpark(
testSql,
true,
df => {
val foundBroadcastHashJoinExpr = df.queryExecution.executedPlan.collect {
case f: CHBroadcastHashJoinExecTransformer => f
}
assert(foundBroadcastHashJoinExpr.size == 2)
}
)
}
}
3 changes: 0 additions & 3 deletions cpp-ch/local-engine/Join/StorageJoinFromReadBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ StorageJoinFromReadBuffer::StorageJoinFromReadBuffer(

DB::JoinPtr StorageJoinFromReadBuffer::getJoinLocked(std::shared_ptr<DB::TableJoin> analyzed_join, DB::ContextPtr /*context*/) const
{
if (!analyzed_join->sameStrictnessAndKind(join_->getTableJoin().strictness(), join_->getTableJoin().kind()))
throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "Table {} has incompatible type of JOIN.", storage_metadata_.comment);

if ((analyzed_join->forceNullableRight() && !use_nulls_)
|| (!analyzed_join->forceNullableRight() && isLeftOrFull(analyzed_join->kind()) && use_nulls_))
throw Exception(
Expand Down

0 comments on commit 541cb54

Please sign in to comment.