From 6729158fbe71d35a2bdc31ba457729df170b5873 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 13 Aug 2024 23:43:39 -0700 Subject: [PATCH] feat: add other join types from the official Substrait --- .../substrait/proto/substrait/algebra.proto | 13 ++++++++----- .../execution/SortMergeJoinExecTransformer.scala | 2 +- .../org/apache/gluten/utils/SubstraitUtil.scala | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto b/gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto index 3813de8684453..afcf1c41dabc5 100644 --- a/gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto +++ b/gluten-core/src/main/resources/substrait/proto/substrait/algebra.proto @@ -234,11 +234,11 @@ message JoinRel { JOIN_TYPE_LEFT = 3; JOIN_TYPE_RIGHT = 4; JOIN_TYPE_LEFT_SEMI = 5; - JOIN_TYPE_RIGHT_SEMI = 6; - JOIN_TYPE_ANTI = 7; - // This join is useful for nested sub-queries where we need exactly one tuple in output (or throw exception) - // See Section 3.2 of https://15721.courses.cs.cmu.edu/spring2018/papers/16-optimizer2/hyperjoins-btw2017.pdf - JOIN_TYPE_SINGLE = 8; + JOIN_TYPE_LEFT_ANTI = 6; + JOIN_TYPE_LEFT_SINGLE = 7; + JOIN_TYPE_RIGHT_SEMI = 8; + JOIN_TYPE_RIGHT_ANTI = 9; + JOIN_TYPE_RIGHT_SINGLE = 10; } substrait.extensions.AdvancedExtension advanced_extension = 10; @@ -253,6 +253,7 @@ message CrossRel { JoinType type = 5; + // TODO -- Remove this unnecessary type. enum JoinType { JOIN_TYPE_UNSPECIFIED = 0; JOIN_TYPE_INNER = 1; @@ -649,6 +650,8 @@ message HashJoinRel { JOIN_TYPE_RIGHT_SEMI = 6; JOIN_TYPE_LEFT_ANTI = 7; JOIN_TYPE_RIGHT_ANTI = 8; + JOIN_TYPE_LEFT_SINGLE = 9; + JOIN_TYPE_RIGHT_SINGLE = 10; } substrait.extensions.AdvancedExtension advanced_extension = 10; diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala index c96789569f9ac..5c57e5b62adad 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala @@ -152,7 +152,7 @@ abstract class SortMergeJoinExecTransformerBase( case LeftSemi => JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI case LeftAnti => - JoinRel.JoinType.JOIN_TYPE_ANTI + JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI case _ => // TODO: Support cross join with Cross Rel // TODO: Support existence join diff --git a/gluten-core/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala b/gluten-core/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala index c641cb44891da..c150391434f7b 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/utils/SubstraitUtil.scala @@ -43,7 +43,7 @@ object SubstraitUtil { case LeftSemi => JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI case LeftAnti => - JoinRel.JoinType.JOIN_TYPE_ANTI + JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI case _ => // TODO: Support existence join JoinRel.JoinType.UNRECOGNIZED