From a52aecb023c8e7ebe07650689dea983db7a2eaa8 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Fri, 6 Sep 2024 17:45:24 +0800 Subject: [PATCH 1/4] Initial --- .../BroadcastNestedLoopJoinExecTransformer.scala | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala index ae407b3b3efa..f76cca3fe744 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala @@ -162,21 +162,11 @@ abstract class BroadcastNestedLoopJoinExecTransformer( } def validateJoinTypeAndBuildSide(): ValidationResult = { - val result = joinType match { + joinType match { case _: InnerLike | LeftOuter | RightOuter => ValidationResult.succeeded case _ => ValidationResult.failed(s"$joinType join is not supported with BroadcastNestedLoopJoin") } - - if (!result.ok()) { - return result - } - - (joinType, buildSide) match { - case (LeftOuter, BuildLeft) | (RightOuter, BuildRight) => - ValidationResult.failed(s"$joinType join is not supported with $buildSide") - case _ => ValidationResult.succeeded // continue - } } override protected def doValidateInternal(): ValidationResult = { From 23cb69ae08db00295b955576934c62642a52bb96 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Mon, 9 Sep 2024 14:04:49 +0800 Subject: [PATCH 2/4] Fix --- .../execution/BroadcastNestedLoopJoinExecTransformer.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala index f76cca3fe744..fefd7cf0f423 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala @@ -96,6 +96,7 @@ abstract class BroadcastNestedLoopJoinExecTransformer( joinType match { case _: InnerLike => right.outputPartitioning case RightOuter => right.outputPartitioning + case LeftOuter => left.outputPartitioning case x => throw new IllegalArgumentException( s"BroadcastNestedLoopJoin should not take $x as the JoinType with building left side") @@ -104,6 +105,7 @@ abstract class BroadcastNestedLoopJoinExecTransformer( joinType match { case _: InnerLike => left.outputPartitioning case LeftOuter => left.outputPartitioning + case RightOuter => right.outputPartitioning case x => throw new IllegalArgumentException( s"BroadcastNestedLoopJoin should not take $x as the JoinType with building right side") @@ -123,9 +125,7 @@ abstract class BroadcastNestedLoopJoinExecTransformer( val operatorId = context.nextOperatorId(this.nodeName) val joinParams = new JoinParams - if (condition.isDefined) { - joinParams.isWithCondition = true - } + joinParams.isWithCondition = condition.isDefined val crossRel = JoinUtils.createCrossRel( substraitJoinType, From d87bd998492ad9e84f72ca1b7ba8120ff3823f85 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Mon, 9 Sep 2024 17:06:39 +0800 Subject: [PATCH 3/4] Fix wrong join type passed to substrait/velox --- cpp/velox/substrait/SubstraitToVeloxPlan.cc | 3 +++ .../SubstraitToVeloxPlanValidator.cc | 1 + ...oadcastNestedLoopJoinExecTransformer.scala | 2 +- .../apache/gluten/utils/SubstraitUtil.scala | 26 +++++++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 0dab6b280a5e..58fc4020bf2b 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -391,6 +391,9 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: case ::substrait::CrossRel_JoinType::CrossRel_JoinType_JOIN_TYPE_LEFT: joinType = core::JoinType::kLeft; break; + case ::substrait::CrossRel_JoinType::CrossRel_JoinType_JOIN_TYPE_RIGHT: + joinType = core::JoinType::kRight; + break; default: VELOX_NYI("Unsupported Join type: {}", std::to_string(crossRel.type())); } diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index 42d91bd48d12..54021a574494 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -973,6 +973,7 @@ bool SubstraitToVeloxPlanValidator::validate(const ::substrait::CrossRel& crossR switch (crossRel.type()) { case ::substrait::CrossRel_JoinType_JOIN_TYPE_INNER: case ::substrait::CrossRel_JoinType_JOIN_TYPE_LEFT: + case ::substrait::CrossRel_JoinType_JOIN_TYPE_RIGHT: break; default: LOG_VALIDATION_MSG("Unsupported Join type in CrossRel"); diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala index fefd7cf0f423..6c0398fff8f3 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala @@ -49,7 +49,7 @@ abstract class BroadcastNestedLoopJoinExecTransformer( override def rightKeys: Seq[Expression] = Nil private lazy val substraitJoinType: CrossRel.JoinType = - SubstraitUtil.toCrossRelSubstrait(joinType) + SubstraitUtil.toCrossRelSubstrait(joinType, buildSide) // Unique ID for builded table lazy val buildBroadcastTableId: String = "BuiltBNLJBroadcastTable-" + buildPlan.id diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala b/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala index c150391434f7..6aeda9cd1e6e 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala @@ -23,6 +23,7 @@ import org.apache.gluten.substrait.SubstraitContext import org.apache.gluten.substrait.expression.ExpressionNode import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.catalyst.plans.{FullOuter, InnerLike, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter} import io.substrait.proto.{CrossRel, JoinRel} @@ -49,6 +50,31 @@ object SubstraitUtil { JoinRel.JoinType.UNRECOGNIZED } + def toCrossRelSubstrait(sparkJoin: JoinType, buildSide: BuildSide): CrossRel.JoinType = + sparkJoin match { + case _: InnerLike => + CrossRel.JoinType.JOIN_TYPE_INNER + case LeftOuter => + // since we always assume build right side in substrait, + // the left and right relations are exchanged and the + // join type is reverted. + buildSide match { + case BuildLeft => CrossRel.JoinType.JOIN_TYPE_RIGHT + case BuildRight => CrossRel.JoinType.JOIN_TYPE_LEFT + } + case RightOuter => + buildSide match { + case BuildRight => CrossRel.JoinType.JOIN_TYPE_RIGHT + case BuildLeft => CrossRel.JoinType.JOIN_TYPE_LEFT + } + case LeftSemi => + CrossRel.JoinType.JOIN_TYPE_LEFT_SEMI + case FullOuter => + CrossRel.JoinType.JOIN_TYPE_OUTER + case _ => + CrossRel.JoinType.UNRECOGNIZED + } + def toCrossRelSubstrait(sparkJoin: JoinType): CrossRel.JoinType = sparkJoin match { case _: InnerLike => CrossRel.JoinType.JOIN_TYPE_INNER From d6e117dac64476db2ed9b4535c67391a87aef16b Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Tue, 10 Sep 2024 15:33:46 +0800 Subject: [PATCH 4/4] Refine --- ...BroadcastNestedLoopJoinExecTransformer.scala | 4 ++-- .../org/apache/gluten/utils/SubstraitUtil.scala | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala index 6c0398fff8f3..b7be3d90ca34 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala @@ -49,9 +49,9 @@ abstract class BroadcastNestedLoopJoinExecTransformer( override def rightKeys: Seq[Expression] = Nil private lazy val substraitJoinType: CrossRel.JoinType = - SubstraitUtil.toCrossRelSubstrait(joinType, buildSide) + SubstraitUtil.toCrossRelSubstrait(joinType, needSwitchChildren) - // Unique ID for builded table + // Unique ID for built table. lazy val buildBroadcastTableId: String = "BuiltBNLJBroadcastTable-" + buildPlan.id // Hint substrait to switch the left and right, diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala b/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala index 6aeda9cd1e6e..6a678014ebcd 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala @@ -23,7 +23,6 @@ import org.apache.gluten.substrait.SubstraitContext import org.apache.gluten.substrait.expression.ExpressionNode import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} -import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.catalyst.plans.{FullOuter, InnerLike, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter} import io.substrait.proto.{CrossRel, JoinRel} @@ -50,7 +49,7 @@ object SubstraitUtil { JoinRel.JoinType.UNRECOGNIZED } - def toCrossRelSubstrait(sparkJoin: JoinType, buildSide: BuildSide): CrossRel.JoinType = + def toCrossRelSubstrait(sparkJoin: JoinType, needSwitchChildren: Boolean): CrossRel.JoinType = sparkJoin match { case _: InnerLike => CrossRel.JoinType.JOIN_TYPE_INNER @@ -58,14 +57,16 @@ object SubstraitUtil { // since we always assume build right side in substrait, // the left and right relations are exchanged and the // join type is reverted. - buildSide match { - case BuildLeft => CrossRel.JoinType.JOIN_TYPE_RIGHT - case BuildRight => CrossRel.JoinType.JOIN_TYPE_LEFT + if (needSwitchChildren) { + CrossRel.JoinType.JOIN_TYPE_RIGHT + } else { + CrossRel.JoinType.JOIN_TYPE_LEFT } case RightOuter => - buildSide match { - case BuildRight => CrossRel.JoinType.JOIN_TYPE_RIGHT - case BuildLeft => CrossRel.JoinType.JOIN_TYPE_LEFT + if (needSwitchChildren) { + CrossRel.JoinType.JOIN_TYPE_LEFT + } else { + CrossRel.JoinType.JOIN_TYPE_RIGHT } case LeftSemi => CrossRel.JoinType.JOIN_TYPE_LEFT_SEMI