From e7ebbb41ac2878d5fb1572774431658017148af8 Mon Sep 17 00:00:00 2001 From: zml1206 Date: Fri, 14 Jun 2024 16:28:36 +0800 Subject: [PATCH] [CORE] Creates vanilla plan when the join operators fall back fix fix fix fix ut fix ut update fix update fix fix fix update update golden file split add cost evaluator fix update update update fix refactor to convert SortMergeJoinExec to ShuffledHashJoinExec update fix fix update fix fix ck ut fix style move drop local sort into RewriteJoin update --- .../backendsapi/clickhouse/CHBackend.scala | 1 - ...ouseTPCHNullableColumnarShuffleSuite.scala | 8 +- .../GlutenClickHouseTPCHNullableSuite.scala | 8 +- .../execution/GlutenClickHouseTPCHSuite.scala | 8 +- .../v1-bhj-ras/spark32/20.txt | 152 +++++---- .../v1-bhj-ras/spark33/20.txt | 152 +++++---- .../tpch-approved-plan/v1-bhj/spark32/20.txt | 152 +++++---- .../tpch-approved-plan/v1-bhj/spark33/20.txt | 152 +++++---- .../tpch-approved-plan/v1-ras/spark32/10.txt | 132 +++++--- .../tpch-approved-plan/v1-ras/spark32/11.txt | 102 +++--- .../tpch-approved-plan/v1-ras/spark32/12.txt | 64 ++-- .../tpch-approved-plan/v1-ras/spark32/13.txt | 70 ++-- .../tpch-approved-plan/v1-ras/spark32/14.txt | 50 +-- .../tpch-approved-plan/v1-ras/spark32/15.txt | 64 ++-- .../tpch-approved-plan/v1-ras/spark32/16.txt | 82 +++-- .../tpch-approved-plan/v1-ras/spark32/17.txt | 89 ++--- .../tpch-approved-plan/v1-ras/spark32/18.txt | 161 +++++---- .../tpch-approved-plan/v1-ras/spark32/19.txt | 50 +-- .../tpch-approved-plan/v1-ras/spark32/20.txt | 204 +++++++----- .../tpch-approved-plan/v1-ras/spark32/21.txt | 187 +++++++---- .../tpch-approved-plan/v1-ras/spark32/22.txt | 56 ++-- .../tpch-approved-plan/v1-ras/spark32/3.txt | 92 ++++-- .../tpch-approved-plan/v1-ras/spark32/4.txt | 66 ++-- .../tpch-approved-plan/v1-ras/spark32/5.txt | 204 +++++++----- .../tpch-approved-plan/v1-ras/spark32/7.txt | 196 +++++++---- .../tpch-approved-plan/v1-ras/spark32/8.txt | 270 +++++++++------ .../tpch-approved-plan/v1-ras/spark32/9.txt | 198 ++++++----- .../tpch-approved-plan/v1-ras/spark33/10.txt | 132 +++++--- .../tpch-approved-plan/v1-ras/spark33/11.txt | 312 ++++++++++-------- .../tpch-approved-plan/v1-ras/spark33/12.txt | 64 ++-- .../tpch-approved-plan/v1-ras/spark33/13.txt | 70 ++-- .../tpch-approved-plan/v1-ras/spark33/14.txt | 50 +-- .../tpch-approved-plan/v1-ras/spark33/15.txt | 162 ++++----- .../tpch-approved-plan/v1-ras/spark33/16.txt | 82 +++-- .../tpch-approved-plan/v1-ras/spark33/17.txt | 89 ++--- .../tpch-approved-plan/v1-ras/spark33/18.txt | 161 +++++---- .../tpch-approved-plan/v1-ras/spark33/19.txt | 50 +-- .../tpch-approved-plan/v1-ras/spark33/20.txt | 204 +++++++----- .../tpch-approved-plan/v1-ras/spark33/21.txt | 187 +++++++---- .../tpch-approved-plan/v1-ras/spark33/22.txt | 130 ++++---- .../tpch-approved-plan/v1-ras/spark33/3.txt | 92 ++++-- .../tpch-approved-plan/v1-ras/spark33/4.txt | 66 ++-- .../tpch-approved-plan/v1-ras/spark33/5.txt | 204 +++++++----- .../tpch-approved-plan/v1-ras/spark33/7.txt | 196 +++++++---- .../tpch-approved-plan/v1-ras/spark33/8.txt | 270 +++++++++------ .../tpch-approved-plan/v1-ras/spark33/9.txt | 198 ++++++----- .../tpch-approved-plan/v1-ras/spark34/10.txt | 132 +++++--- .../tpch-approved-plan/v1-ras/spark34/11.txt | 312 ++++++++++-------- .../tpch-approved-plan/v1-ras/spark34/12.txt | 64 ++-- .../tpch-approved-plan/v1-ras/spark34/13.txt | 70 ++-- .../tpch-approved-plan/v1-ras/spark34/14.txt | 50 +-- .../tpch-approved-plan/v1-ras/spark34/15.txt | 162 ++++----- .../tpch-approved-plan/v1-ras/spark34/16.txt | 82 +++-- .../tpch-approved-plan/v1-ras/spark34/17.txt | 89 ++--- .../tpch-approved-plan/v1-ras/spark34/18.txt | 161 +++++---- .../tpch-approved-plan/v1-ras/spark34/19.txt | 50 +-- .../tpch-approved-plan/v1-ras/spark34/20.txt | 204 +++++++----- .../tpch-approved-plan/v1-ras/spark34/21.txt | 187 +++++++---- .../tpch-approved-plan/v1-ras/spark34/22.txt | 130 ++++---- .../tpch-approved-plan/v1-ras/spark34/3.txt | 92 ++++-- .../tpch-approved-plan/v1-ras/spark34/4.txt | 66 ++-- .../tpch-approved-plan/v1-ras/spark34/5.txt | 204 +++++++----- .../tpch-approved-plan/v1-ras/spark34/7.txt | 196 +++++++---- .../tpch-approved-plan/v1-ras/spark34/8.txt | 270 +++++++++------ .../tpch-approved-plan/v1-ras/spark34/9.txt | 198 ++++++----- .../tpch-approved-plan/v1/spark32/10.txt | 132 +++++--- .../tpch-approved-plan/v1/spark32/11.txt | 102 +++--- .../tpch-approved-plan/v1/spark32/12.txt | 64 ++-- .../tpch-approved-plan/v1/spark32/13.txt | 70 ++-- .../tpch-approved-plan/v1/spark32/14.txt | 50 +-- .../tpch-approved-plan/v1/spark32/15.txt | 64 ++-- .../tpch-approved-plan/v1/spark32/16.txt | 82 +++-- .../tpch-approved-plan/v1/spark32/17.txt | 89 ++--- .../tpch-approved-plan/v1/spark32/18.txt | 161 +++++---- .../tpch-approved-plan/v1/spark32/19.txt | 50 +-- .../tpch-approved-plan/v1/spark32/20.txt | 204 +++++++----- .../tpch-approved-plan/v1/spark32/21.txt | 187 +++++++---- .../tpch-approved-plan/v1/spark32/22.txt | 56 ++-- .../tpch-approved-plan/v1/spark32/3.txt | 92 ++++-- .../tpch-approved-plan/v1/spark32/4.txt | 66 ++-- .../tpch-approved-plan/v1/spark32/5.txt | 204 +++++++----- .../tpch-approved-plan/v1/spark32/7.txt | 196 +++++++---- .../tpch-approved-plan/v1/spark32/8.txt | 270 +++++++++------ .../tpch-approved-plan/v1/spark32/9.txt | 198 ++++++----- .../tpch-approved-plan/v1/spark33/10.txt | 132 +++++--- .../tpch-approved-plan/v1/spark33/11.txt | 312 ++++++++++-------- .../tpch-approved-plan/v1/spark33/12.txt | 64 ++-- .../tpch-approved-plan/v1/spark33/13.txt | 70 ++-- .../tpch-approved-plan/v1/spark33/14.txt | 50 +-- .../tpch-approved-plan/v1/spark33/15.txt | 162 ++++----- .../tpch-approved-plan/v1/spark33/16.txt | 82 +++-- .../tpch-approved-plan/v1/spark33/17.txt | 89 ++--- .../tpch-approved-plan/v1/spark33/18.txt | 161 +++++---- .../tpch-approved-plan/v1/spark33/19.txt | 50 +-- .../tpch-approved-plan/v1/spark33/20.txt | 204 +++++++----- .../tpch-approved-plan/v1/spark33/21.txt | 187 +++++++---- .../tpch-approved-plan/v1/spark33/22.txt | 164 ++++----- .../tpch-approved-plan/v1/spark33/3.txt | 92 ++++-- .../tpch-approved-plan/v1/spark33/4.txt | 66 ++-- .../tpch-approved-plan/v1/spark33/5.txt | 204 +++++++----- .../tpch-approved-plan/v1/spark33/7.txt | 196 +++++++---- .../tpch-approved-plan/v1/spark33/8.txt | 270 +++++++++------ .../tpch-approved-plan/v1/spark33/9.txt | 198 ++++++----- .../tpch-approved-plan/v1/spark34/10.txt | 132 +++++--- .../tpch-approved-plan/v1/spark34/11.txt | 312 ++++++++++-------- .../tpch-approved-plan/v1/spark34/12.txt | 64 ++-- .../tpch-approved-plan/v1/spark34/13.txt | 70 ++-- .../tpch-approved-plan/v1/spark34/14.txt | 50 +-- .../tpch-approved-plan/v1/spark34/15.txt | 162 ++++----- .../tpch-approved-plan/v1/spark34/16.txt | 82 +++-- .../tpch-approved-plan/v1/spark34/17.txt | 89 ++--- .../tpch-approved-plan/v1/spark34/18.txt | 161 +++++---- .../tpch-approved-plan/v1/spark34/19.txt | 50 +-- .../tpch-approved-plan/v1/spark34/20.txt | 204 +++++++----- .../tpch-approved-plan/v1/spark34/21.txt | 187 +++++++---- .../tpch-approved-plan/v1/spark34/22.txt | 164 ++++----- .../tpch-approved-plan/v1/spark34/3.txt | 92 ++++-- .../tpch-approved-plan/v1/spark34/4.txt | 66 ++-- .../tpch-approved-plan/v1/spark34/5.txt | 204 +++++++----- .../tpch-approved-plan/v1/spark34/7.txt | 196 +++++++---- .../tpch-approved-plan/v1/spark34/8.txt | 270 +++++++++------ .../tpch-approved-plan/v1/spark34/9.txt | 198 ++++++----- .../gluten/execution/FallbackSuite.scala | 23 ++ .../org/apache/gluten/GlutenPlugin.scala | 3 +- .../backendsapi/BackendSettingsApi.scala | 1 - .../apache/gluten/execution/SortUtils.scala | 33 +- .../gluten/extension/StrategyOverrides.scala | 213 ------------ .../columnar/OffloadSingleNode.scala | 29 +- .../columnar/TransformHintRule.scala | 34 +- .../columnar/rewrite/RewriteJoin.scala | 63 ++++ .../columnar/rewrite/RewriteSingleNode.scala | 2 +- .../RewriteSparkPlanRulesManager.scala | 11 +- .../utils/velox/VeloxTestSettings.scala | 1 - .../joins/GlutenBroadcastJoinSuite.scala | 19 -- .../GlutenSessionExtensionSuite.scala | 3 +- .../utils/velox/VeloxTestSettings.scala | 1 - .../GlutenReplaceHashWithSortAggSuite.scala | 8 +- .../GlutenSessionExtensionSuite.scala | 3 +- .../utils/velox/VeloxTestSettings.scala | 4 - .../apache/spark/sql/GlutenJoinSuite.scala | 13 - .../GlutenReplaceHashWithSortAggSuite.scala | 8 +- .../GlutenSessionExtensionSuite.scala | 3 +- .../utils/velox/VeloxTestSettings.scala | 4 - .../apache/spark/sql/GlutenJoinSuite.scala | 13 - .../GlutenReplaceHashWithSortAggSuite.scala | 8 +- .../GlutenSessionExtensionSuite.scala | 3 +- .../org/apache/gluten/GlutenConfig.scala | 10 - .../sql/execution/JoinSelectionShim.scala | 51 --- .../sql/execution/JoinSelectionShim.scala | 51 --- .../sql/execution/JoinSelectionShim.scala | 51 --- 150 files changed, 9890 insertions(+), 7027 deletions(-) rename shims/spark32/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala => gluten-core/src/main/scala/org/apache/gluten/execution/SortUtils.scala (53%) delete mode 100644 gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala create mode 100644 gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala delete mode 100644 shims/spark33/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala delete mode 100644 shims/spark34/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala delete mode 100644 shims/spark35/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala index 1587b9ea3488..cdca1b031a91 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala @@ -189,7 +189,6 @@ object CHBackendSettings extends BackendSettingsApi with Logging { } } - override def utilizeShuffledHashJoinHint(): Boolean = true override def supportShuffleWithProject( outputPartitioning: Partitioning, child: SparkPlan): Boolean = { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala index 245b52d37109..c5f67f45d577 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala @@ -17,7 +17,7 @@ package org.apache.gluten.execution import org.apache.spark.SparkConf -import org.apache.spark.sql.catalyst.optimizer.BuildLeft +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} class GlutenClickHouseTPCHNullableColumnarShuffleSuite extends GlutenClickHouseTPCHAbstractSuite { @@ -63,7 +63,11 @@ class GlutenClickHouseTPCHNullableColumnarShuffleSuite extends GlutenClickHouseT val shjBuildLeft = df.queryExecution.executedPlan.collect { case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildLeft => shj } - assert(shjBuildLeft.size == 2) + assert(shjBuildLeft.size == 1) + val shjBuildRight = df.queryExecution.executedPlan.collect { + case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildRight => shj + } + assert(shjBuildRight.size == 1) } } } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala index 0eb4de74209b..7f62c6993157 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala @@ -20,7 +20,7 @@ import org.apache.gluten.GlutenConfig import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.expressions.Alias -import org.apache.spark.sql.catalyst.optimizer.BuildLeft +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} class GlutenClickHouseTPCHNullableSuite extends GlutenClickHouseTPCHAbstractSuite { @@ -66,7 +66,11 @@ class GlutenClickHouseTPCHNullableSuite extends GlutenClickHouseTPCHAbstractSuit val shjBuildLeft = df.queryExecution.executedPlan.collect { case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildLeft => shj } - assert(shjBuildLeft.size == 2) + assert(shjBuildLeft.size == 1) + val shjBuildRight = df.queryExecution.executedPlan.collect { + case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildRight => shj + } + assert(shjBuildRight.size == 1) } } } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala index 96b2cb09b163..d26891ddb1ea 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala @@ -18,7 +18,7 @@ package org.apache.gluten.execution import org.apache.spark.SparkConf import org.apache.spark.sql.{Row, TestUtils} -import org.apache.spark.sql.catalyst.optimizer.BuildLeft +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} import org.apache.spark.sql.types.{DecimalType, StructType} // Some sqls' line length exceeds 100 @@ -73,7 +73,11 @@ class GlutenClickHouseTPCHSuite extends GlutenClickHouseTPCHAbstractSuite { val shjBuildLeft = df.queryExecution.executedPlan.collect { case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildLeft => shj } - assert(shjBuildLeft.size == 2) + assert(shjBuildLeft.size == 1) + val shjBuildRight = df.queryExecution.executedPlan.collect { + case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildRight => shj + } + assert(shjBuildRight.size == 1) } } } diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark32/20.txt index 6ebe36be3494..a1f1bb51cb98 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (112) +- == Final Plan == VeloxColumnarToRowExec (73) +- ^ SortExecTransformer (71) @@ -59,42 +59,44 @@ AdaptiveSparkPlan (110) +- ^ NoopFilter (56) +- ^ Scan parquet (55) +- == Initial Plan == - Sort (109) - +- Exchange (108) - +- Project (107) - +- BroadcastHashJoin Inner BuildRight (106) - :- Project (101) - : +- ShuffledHashJoin LeftSemi BuildRight (100) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Exchange (99) - : +- Project (98) - : +- BroadcastHashJoin Inner BuildLeft (97) - : :- BroadcastExchange (84) - : : +- BroadcastHashJoin LeftSemi BuildRight (83) - : : :- Filter (78) - : : : +- Scan parquet (77) - : : +- BroadcastExchange (82) - : : +- Project (81) - : : +- Filter (80) - : : +- Scan parquet (79) - : +- Filter (96) - : +- HashAggregate (95) - : +- Exchange (94) - : +- HashAggregate (93) - : +- BroadcastHashJoin LeftSemi BuildRight (92) - : :- Project (87) - : : +- Filter (86) - : : +- Scan parquet (85) - : +- BroadcastExchange (91) - : +- Project (90) - : +- Filter (89) - : +- Scan parquet (88) - +- BroadcastExchange (105) - +- Project (104) - +- Filter (103) - +- Scan parquet (102) + Sort (111) + +- Exchange (110) + +- Project (109) + +- BroadcastHashJoin Inner BuildRight (108) + :- Project (103) + : +- SortMergeJoin LeftSemi (102) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (101) + : +- Exchange (100) + : +- Project (99) + : +- BroadcastHashJoin Inner BuildLeft (98) + : :- BroadcastExchange (85) + : : +- BroadcastHashJoin LeftSemi BuildRight (84) + : : :- Filter (79) + : : : +- Scan parquet (78) + : : +- BroadcastExchange (83) + : : +- Project (82) + : : +- Filter (81) + : : +- Scan parquet (80) + : +- Filter (97) + : +- HashAggregate (96) + : +- Exchange (95) + : +- HashAggregate (94) + : +- BroadcastHashJoin LeftSemi BuildRight (93) + : :- Project (88) + : : +- Filter (87) + : : +- Scan parquet (86) + : +- BroadcastExchange (92) + : +- Project (91) + : +- Filter (90) + : +- Scan parquet (89) + +- BroadcastExchange (107) + +- Project (106) + +- Filter (105) + +- Scan parquet (104) (1) Scan parquet @@ -412,164 +414,172 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(77) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(78) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(78) Filter +(79) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(79) Scan parquet +(80) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(80) Filter +(81) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(81) Project +(82) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(82) BroadcastExchange +(83) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(83) BroadcastHashJoin +(84) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(84) BroadcastExchange +(85) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(85) Scan parquet +(86) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(86) Filter +(87) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(87) Project +(88) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(88) Scan parquet +(89) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(89) Filter +(90) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(90) Project +(91) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(91) BroadcastExchange +(92) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(92) BroadcastHashJoin +(93) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(93) HashAggregate +(94) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(94) Exchange +(95) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) HashAggregate +(96) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Filter +(97) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(97) BroadcastHashJoin +(98) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(98) Project +(99) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(99) Exchange +(100) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) ShuffledHashJoin +(101) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(102) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(101) Project +(103) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(102) Scan parquet +(104) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(103) Filter +(105) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(104) Project +(106) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(105) BroadcastExchange +(107) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(106) BroadcastHashJoin +(108) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(107) Project +(109) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(108) Exchange +(110) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(111) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(110) AdaptiveSparkPlan +(112) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark33/20.txt index 27e59afbb7fc..4a899ae239be 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (107) +AdaptiveSparkPlan (109) +- == Final Plan == VeloxColumnarToRowExec (70) +- AQEShuffleRead (69) @@ -58,42 +58,44 @@ AdaptiveSparkPlan (107) +- ^ NoopFilter (56) +- ^ Scan parquet (55) +- == Initial Plan == - Sort (106) - +- Exchange (105) - +- Project (104) - +- BroadcastHashJoin Inner BuildRight (103) - :- Project (98) - : +- ShuffledHashJoin LeftSemi BuildRight (97) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (96) - : +- Project (95) - : +- BroadcastHashJoin Inner BuildLeft (94) - : :- BroadcastExchange (81) - : : +- BroadcastHashJoin LeftSemi BuildRight (80) - : : :- Filter (75) - : : : +- Scan parquet (74) - : : +- BroadcastExchange (79) - : : +- Project (78) - : : +- Filter (77) - : : +- Scan parquet (76) - : +- Filter (93) - : +- HashAggregate (92) - : +- Exchange (91) - : +- HashAggregate (90) - : +- BroadcastHashJoin LeftSemi BuildRight (89) - : :- Project (84) - : : +- Filter (83) - : : +- Scan parquet (82) - : +- BroadcastExchange (88) - : +- Project (87) - : +- Filter (86) - : +- Scan parquet (85) - +- BroadcastExchange (102) - +- Project (101) - +- Filter (100) - +- Scan parquet (99) + Sort (108) + +- Exchange (107) + +- Project (106) + +- BroadcastHashJoin Inner BuildRight (105) + :- Project (100) + : +- SortMergeJoin LeftSemi (99) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (98) + : +- Exchange (97) + : +- Project (96) + : +- BroadcastHashJoin Inner BuildLeft (95) + : :- BroadcastExchange (82) + : : +- BroadcastHashJoin LeftSemi BuildRight (81) + : : :- Filter (76) + : : : +- Scan parquet (75) + : : +- BroadcastExchange (80) + : : +- Project (79) + : : +- Filter (78) + : : +- Scan parquet (77) + : +- Filter (94) + : +- HashAggregate (93) + : +- Exchange (92) + : +- HashAggregate (91) + : +- BroadcastHashJoin LeftSemi BuildRight (90) + : :- Project (85) + : : +- Filter (84) + : : +- Scan parquet (83) + : +- BroadcastExchange (89) + : +- Project (88) + : +- Filter (87) + : +- Scan parquet (86) + +- BroadcastExchange (104) + +- Project (103) + +- Filter (102) + +- Scan parquet (101) (1) Scan parquet @@ -401,164 +403,172 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(76) Scan parquet +(77) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(77) Filter +(78) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(78) Project +(79) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(79) BroadcastExchange +(80) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(80) BroadcastHashJoin +(81) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(81) BroadcastExchange +(82) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(82) Scan parquet +(83) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(83) Filter +(84) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(84) Project +(85) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(85) Scan parquet +(86) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(86) Filter +(87) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(87) Project +(88) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(88) BroadcastExchange +(89) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(89) BroadcastHashJoin +(90) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(90) HashAggregate +(91) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(91) Exchange +(92) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(93) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(93) Filter +(94) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(94) BroadcastHashJoin +(95) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(95) Project +(96) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Exchange +(97) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(98) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(99) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(98) Project +(100) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(99) Scan parquet +(101) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(100) Filter +(102) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(101) Project +(103) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(102) BroadcastExchange +(104) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(103) BroadcastHashJoin +(105) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(104) Project +(106) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(105) Exchange +(107) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(108) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(107) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt index 7b494469aacc..9e03d8319537 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (112) +- == Final Plan == VeloxColumnarToRowExec (73) +- ^ SortExecTransformer (71) @@ -59,42 +59,44 @@ AdaptiveSparkPlan (110) +- ^ FilterExecTransformer (56) +- ^ Scan parquet (55) +- == Initial Plan == - Sort (109) - +- Exchange (108) - +- Project (107) - +- BroadcastHashJoin Inner BuildRight (106) - :- Project (101) - : +- ShuffledHashJoin LeftSemi BuildRight (100) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Exchange (99) - : +- Project (98) - : +- BroadcastHashJoin Inner BuildLeft (97) - : :- BroadcastExchange (84) - : : +- BroadcastHashJoin LeftSemi BuildRight (83) - : : :- Filter (78) - : : : +- Scan parquet (77) - : : +- BroadcastExchange (82) - : : +- Project (81) - : : +- Filter (80) - : : +- Scan parquet (79) - : +- Filter (96) - : +- HashAggregate (95) - : +- Exchange (94) - : +- HashAggregate (93) - : +- BroadcastHashJoin LeftSemi BuildRight (92) - : :- Project (87) - : : +- Filter (86) - : : +- Scan parquet (85) - : +- BroadcastExchange (91) - : +- Project (90) - : +- Filter (89) - : +- Scan parquet (88) - +- BroadcastExchange (105) - +- Project (104) - +- Filter (103) - +- Scan parquet (102) + Sort (111) + +- Exchange (110) + +- Project (109) + +- BroadcastHashJoin Inner BuildRight (108) + :- Project (103) + : +- SortMergeJoin LeftSemi (102) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (101) + : +- Exchange (100) + : +- Project (99) + : +- BroadcastHashJoin Inner BuildLeft (98) + : :- BroadcastExchange (85) + : : +- BroadcastHashJoin LeftSemi BuildRight (84) + : : :- Filter (79) + : : : +- Scan parquet (78) + : : +- BroadcastExchange (83) + : : +- Project (82) + : : +- Filter (81) + : : +- Scan parquet (80) + : +- Filter (97) + : +- HashAggregate (96) + : +- Exchange (95) + : +- HashAggregate (94) + : +- BroadcastHashJoin LeftSemi BuildRight (93) + : :- Project (88) + : : +- Filter (87) + : : +- Scan parquet (86) + : +- BroadcastExchange (92) + : +- Project (91) + : +- Filter (90) + : +- Scan parquet (89) + +- BroadcastExchange (107) + +- Project (106) + +- Filter (105) + +- Scan parquet (104) (1) Scan parquet @@ -412,164 +414,172 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(77) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(78) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(78) Filter +(79) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(79) Scan parquet +(80) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(80) Filter +(81) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(81) Project +(82) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(82) BroadcastExchange +(83) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(83) BroadcastHashJoin +(84) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(84) BroadcastExchange +(85) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(85) Scan parquet +(86) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(86) Filter +(87) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(87) Project +(88) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(88) Scan parquet +(89) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(89) Filter +(90) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(90) Project +(91) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(91) BroadcastExchange +(92) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(92) BroadcastHashJoin +(93) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(93) HashAggregate +(94) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(94) Exchange +(95) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) HashAggregate +(96) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Filter +(97) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(97) BroadcastHashJoin +(98) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(98) Project +(99) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(99) Exchange +(100) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) ShuffledHashJoin +(101) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(102) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(101) Project +(103) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(102) Scan parquet +(104) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(103) Filter +(105) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(104) Project +(106) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(105) BroadcastExchange +(107) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(106) BroadcastHashJoin +(108) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(107) Project +(109) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(108) Exchange +(110) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(111) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(110) AdaptiveSparkPlan +(112) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt index a0edead7013d..5cd3c9d35c2c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (107) +AdaptiveSparkPlan (109) +- == Final Plan == VeloxColumnarToRowExec (70) +- AQEShuffleRead (69) @@ -58,42 +58,44 @@ AdaptiveSparkPlan (107) +- ^ FilterExecTransformer (56) +- ^ Scan parquet (55) +- == Initial Plan == - Sort (106) - +- Exchange (105) - +- Project (104) - +- BroadcastHashJoin Inner BuildRight (103) - :- Project (98) - : +- ShuffledHashJoin LeftSemi BuildRight (97) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (96) - : +- Project (95) - : +- BroadcastHashJoin Inner BuildLeft (94) - : :- BroadcastExchange (81) - : : +- BroadcastHashJoin LeftSemi BuildRight (80) - : : :- Filter (75) - : : : +- Scan parquet (74) - : : +- BroadcastExchange (79) - : : +- Project (78) - : : +- Filter (77) - : : +- Scan parquet (76) - : +- Filter (93) - : +- HashAggregate (92) - : +- Exchange (91) - : +- HashAggregate (90) - : +- BroadcastHashJoin LeftSemi BuildRight (89) - : :- Project (84) - : : +- Filter (83) - : : +- Scan parquet (82) - : +- BroadcastExchange (88) - : +- Project (87) - : +- Filter (86) - : +- Scan parquet (85) - +- BroadcastExchange (102) - +- Project (101) - +- Filter (100) - +- Scan parquet (99) + Sort (108) + +- Exchange (107) + +- Project (106) + +- BroadcastHashJoin Inner BuildRight (105) + :- Project (100) + : +- SortMergeJoin LeftSemi (99) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (98) + : +- Exchange (97) + : +- Project (96) + : +- BroadcastHashJoin Inner BuildLeft (95) + : :- BroadcastExchange (82) + : : +- BroadcastHashJoin LeftSemi BuildRight (81) + : : :- Filter (76) + : : : +- Scan parquet (75) + : : +- BroadcastExchange (80) + : : +- Project (79) + : : +- Filter (78) + : : +- Scan parquet (77) + : +- Filter (94) + : +- HashAggregate (93) + : +- Exchange (92) + : +- HashAggregate (91) + : +- BroadcastHashJoin LeftSemi BuildRight (90) + : :- Project (85) + : : +- Filter (84) + : : +- Scan parquet (83) + : +- BroadcastExchange (89) + : +- Project (88) + : +- Filter (87) + : +- Scan parquet (86) + +- BroadcastExchange (104) + +- Project (103) + +- Filter (102) + +- Scan parquet (101) (1) Scan parquet @@ -401,164 +403,172 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(76) Scan parquet +(77) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(77) Filter +(78) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(78) Project +(79) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(79) BroadcastExchange +(80) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(80) BroadcastHashJoin +(81) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(81) BroadcastExchange +(82) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(82) Scan parquet +(83) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(83) Filter +(84) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(84) Project +(85) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(85) Scan parquet +(86) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(86) Filter +(87) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(87) Project +(88) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(88) BroadcastExchange +(89) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(89) BroadcastHashJoin +(90) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(90) HashAggregate +(91) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(91) Exchange +(92) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(93) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(93) Filter +(94) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(94) BroadcastHashJoin +(95) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(95) Project +(96) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Exchange +(97) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(98) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(99) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(98) Project +(100) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(99) Scan parquet +(101) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(100) Filter +(102) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(101) Project +(103) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(102) BroadcastExchange +(104) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(103) BroadcastHashJoin +(105) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(104) Project +(106) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(105) Exchange +(107) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(108) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(107) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/10.txt index 85176d8c6011..ec46bfd07b91 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ NoopFilter (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -371,116 +377,140 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/11.txt index 7c749c0a5ec6..cccf1408bea9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -311,92 +315,108 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/12.txt index 5cf27c6e0cb9..17cdf62608cc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -218,60 +220,68 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/13.txt index d3904d8d079e..730f0e0a438a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ NoopFilter (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -229,74 +231,82 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/14.txt index 00b5fb4142f3..55111a31f874 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -162,44 +164,52 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/15.txt index eab0e2908a10..db2df6c87544 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (48) +AdaptiveSparkPlan (50) +- == Final Plan == VeloxColumnarToRowExec (33) +- ^ SortExecTransformer (31) @@ -28,20 +28,22 @@ AdaptiveSparkPlan (48) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (47) - +- Exchange (46) - +- Project (45) - +- ShuffledHashJoin Inner BuildLeft (44) - :- Exchange (36) - : +- Filter (35) - : +- Scan parquet (34) - +- Filter (43) - +- HashAggregate (42) - +- Exchange (41) - +- HashAggregate (40) - +- Project (39) - +- Filter (38) - +- Scan parquet (37) + Sort (49) + +- Exchange (48) + +- Project (47) + +- SortMergeJoin Inner (46) + :- Sort (37) + : +- Exchange (36) + : +- Filter (35) + : +- Scan parquet (34) + +- Sort (45) + +- Filter (44) + +- HashAggregate (43) + +- Exchange (42) + +- HashAggregate (41) + +- Project (40) + +- Filter (39) + +- Scan parquet (38) (1) Scan parquet @@ -197,60 +199,68 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(37) Scan parquet +(37) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(38) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(38) Filter +(39) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(39) Project +(40) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(40) HashAggregate +(41) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(41) Exchange +(42) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) HashAggregate +(43) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS total_revenue#X] -(43) Filter +(44) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(44) ShuffledHashJoin +(45) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(46) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(45) Project +(47) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(46) Exchange +(48) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) Sort +(49) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(48) AdaptiveSparkPlan +(50) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/16.txt index 354bd4f3fabd..2eb5668906ba 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -296,74 +298,82 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/17.txt index 848d4e2ce4f8..5226aacff753 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ NoopFilter (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -247,90 +250,102 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7), true) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/18.txt index 3d4743403809..c1287b2d685a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -401,154 +408,182 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/19.txt index 6ec9ae965ee9..21e4f472f3b3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -157,44 +159,52 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/20.txt index 24be4842e1b8..1ac0992834eb 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (146) +- == Final Plan == VeloxColumnarToRowExec (96) +- ^ SortExecTransformer (94) @@ -76,45 +76,55 @@ AdaptiveSparkPlan (136) +- ^ NoopFilter (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (135) - +- Exchange (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (127) - : +- Project (126) - : +- ShuffledHashJoin LeftSemi BuildRight (125) - : :- Exchange (99) - : : +- Filter (98) - : : +- Scan parquet (97) - : +- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin Inner BuildLeft (122) - : :- Exchange (108) - : : +- ShuffledHashJoin LeftSemi BuildRight (107) - : : :- Exchange (102) - : : : +- Filter (101) - : : : +- Scan parquet (100) - : : +- Exchange (106) - : : +- Project (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (121) - : +- Filter (120) - : +- HashAggregate (119) - : +- HashAggregate (118) - : +- ShuffledHashJoin LeftSemi BuildRight (117) - : :- Exchange (112) - : : +- Project (111) - : : +- Filter (110) - : : +- Scan parquet (109) - : +- Exchange (116) - : +- Project (115) - : +- Filter (114) - : +- Scan parquet (113) - +- Exchange (131) - +- Project (130) - +- Filter (129) - +- Scan parquet (128) + Sort (145) + +- Exchange (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (136) + : +- Exchange (135) + : +- Project (134) + : +- SortMergeJoin LeftSemi (133) + : :- Sort (100) + : : +- Exchange (99) + : : +- Filter (98) + : : +- Scan parquet (97) + : +- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin Inner (129) + : :- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftSemi (110) + : : :- Sort (104) + : : : +- Exchange (103) + : : : +- Filter (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (128) + : +- Exchange (127) + : +- Filter (126) + : +- HashAggregate (125) + : +- HashAggregate (124) + : +- SortMergeJoin LeftSemi (123) + : :- Sort (117) + : : +- Exchange (116) + : : +- Project (115) + : : +- Filter (114) + : : +- Scan parquet (113) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (141) + +- Exchange (140) + +- Project (139) + +- Filter (138) + +- Scan parquet (137) (1) Scan parquet @@ -518,176 +528,216 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(100) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(101) Filter +(102) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(102) Exchange +(103) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) Scan parquet +(104) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(105) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(104) Filter +(106) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(105) Project +(107) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(106) Exchange +(108) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(109) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(108) Exchange +(111) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Scan parquet +(112) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(110) Filter +(114) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(111) Project +(115) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(112) Exchange +(116) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(113) Scan parquet +(117) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(114) Filter +(119) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(115) Project +(120) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(116) Exchange +(121) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(118) HashAggregate +(124) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(119) HashAggregate +(125) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(120) Filter +(126) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(121) Exchange +(127) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(128) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(129) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(123) Project +(130) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(124) Exchange +(131) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) ShuffledHashJoin +(132) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(126) Project +(134) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(127) Exchange +(135) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(128) Scan parquet +(136) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(137) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(129) Filter +(138) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(130) Project +(139) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(131) Exchange +(140) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(143) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(134) Exchange +(144) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Sort +(145) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(136) AdaptiveSparkPlan +(146) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/21.txt index b8c363fce329..e2a72528c4ed 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (129) +AdaptiveSparkPlan (138) +- == Final Plan == VeloxColumnarToRowExec (92) +- TakeOrderedAndProjectExecTransformer (91) @@ -73,42 +73,51 @@ AdaptiveSparkPlan (129) +- ^ NoopFilter (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (128) - +- HashAggregate (127) - +- Exchange (126) - +- HashAggregate (125) - +- Project (124) - +- ShuffledHashJoin Inner BuildRight (123) - :- Exchange (118) - : +- Project (117) - : +- ShuffledHashJoin Inner BuildRight (116) - : :- Exchange (111) - : : +- Project (110) - : : +- ShuffledHashJoin Inner BuildLeft (109) - : : :- Exchange (95) - : : : +- Filter (94) - : : : +- Scan parquet (93) - : : +- Exchange (108) - : : +- ShuffledHashJoin LeftAnti BuildRight (107) - : : :- ShuffledHashJoin LeftSemi BuildRight (102) - : : : :- Exchange (99) - : : : : +- Project (98) - : : : : +- Filter (97) - : : : : +- Scan parquet (96) - : : : +- Exchange (101) - : : : +- Scan parquet (100) - : : +- Exchange (106) - : : +- Project (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (115) - : +- Project (114) - : +- Filter (113) - : +- Scan parquet (112) - +- Exchange (122) - +- Project (121) - +- Filter (120) - +- Scan parquet (119) + TakeOrderedAndProject (137) + +- HashAggregate (136) + +- Exchange (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (117) + : : +- Exchange (116) + : : +- Project (115) + : : +- SortMergeJoin Inner (114) + : : :- Sort (96) + : : : +- Exchange (95) + : : : +- Filter (94) + : : : +- Scan parquet (93) + : : +- Sort (113) + : : +- Exchange (112) + : : +- SortMergeJoin LeftAnti (111) + : : :- SortMergeJoin LeftSemi (105) + : : : :- Sort (101) + : : : : +- Exchange (100) + : : : : +- Project (99) + : : : : +- Filter (98) + : : : : +- Scan parquet (97) + : : : +- Sort (104) + : : : +- Exchange (103) + : : : +- Scan parquet (102) + : : +- Sort (110) + : : +- Exchange (109) + : : +- Project (108) + : : +- Filter (107) + : : +- Scan parquet (106) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) (1) Scan parquet @@ -501,163 +510,199 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Scan parquet +(96) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(97) Filter +(98) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(98) Project +(99) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(99) Exchange +(100) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(101) Exchange +(103) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(104) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(105) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(103) Scan parquet +(106) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(104) Filter +(107) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(105) Project +(108) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(106) Exchange +(109) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(110) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(111) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(108) Exchange +(112) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(113) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(114) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(110) Project +(115) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(111) Exchange +(116) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(117) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(113) Filter +(119) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(114) Project +(120) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(115) Exchange +(121) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(116) ShuffledHashJoin +(122) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(117) Project +(124) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(118) Exchange +(125) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Scan parquet +(126) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(120) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(121) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(122) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(124) Project +(133) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(125) HashAggregate +(134) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(126) Exchange +(135) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) HashAggregate +(136) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(128) TakeOrderedAndProject +(137) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(129) AdaptiveSparkPlan +(138) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/22.txt index f336a73676ea..984abd470378 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -210,51 +212,59 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/3.txt index f188fa96b0d8..58484edaa685 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -256,80 +260,96 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/4.txt index 42a8fef3563f..cb7a3c3a0955 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -223,60 +225,68 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/5.txt index 378085655899..930a5a0bf488 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -567,176 +577,216 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/7.txt index a7054770a17e..d9eb23cb737e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -537,168 +547,208 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/8.txt index cdede8445908..5c9e51b95c60 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ NoopFilter (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -750,228 +764,284 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6), true) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/9.txt index 11a02d0a54d2..2abb6ec215c6 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -570,168 +580,208 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4), true) as decimal(27,4)))), DecimalType(27,4), true) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/10.txt index 5a74265ab590..3be5f1996fa8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ NoopFilter (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -371,116 +377,140 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/11.txt index 8d17beb8c0a9..2347eb9b677e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -311,359 +315,395 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (136) +- == Final Plan == - VeloxColumnarToRowExec (110) - +- ^ ProjectExecTransformer (108) - +- ^ RegularHashAggregateExecTransformer (107) - +- ^ RegularHashAggregateExecTransformer (106) - +- ^ ProjectExecTransformer (105) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (104) - :- ^ InputIteratorTransformer (99) - : +- ShuffleQueryStage (97), Statistics(X) - : +- ColumnarExchange (96) - : +- VeloxAppendBatches (95) - : +- ^ ProjectExecTransformer (93) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (92) - : :- ^ InputIteratorTransformer (87) - : : +- ShuffleQueryStage (85), Statistics(X) - : : +- ColumnarExchange (84) - : : +- VeloxAppendBatches (83) - : : +- ^ ProjectExecTransformer (81) - : : +- ^ NoopFilter (80) - : : +- ^ Scan parquet (79) - : +- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ReusedExchange (88) - +- ^ InputIteratorTransformer (103) - +- ShuffleQueryStage (101), Statistics(X) - +- ReusedExchange (100) + VeloxColumnarToRowExec (114) + +- ^ ProjectExecTransformer (112) + +- ^ RegularHashAggregateExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxAppendBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxAppendBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ NoopFilter (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (127) - +- HashAggregate (126) - +- Project (125) - +- ShuffledHashJoin Inner BuildRight (124) - :- Exchange (119) - : +- Project (118) - : +- ShuffledHashJoin Inner BuildRight (117) - : :- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (116) - : +- Filter (115) - : +- Scan parquet (114) - +- Exchange (123) - +- Project (122) - +- Filter (121) - +- Scan parquet (120) - - -(79) Scan parquet + HashAggregate (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (118) + : : +- Exchange (117) + : : +- Filter (116) + : : +- Scan parquet (115) + : +- Sort (122) + : +- Exchange (121) + : +- Filter (120) + : +- Scan parquet (119) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) + + +(83) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(80) NoopFilter +(84) NoopFilter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(81) ProjectExecTransformer +(85) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(82) WholeStageCodegenTransformer (X) +(86) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(83) VeloxAppendBatches +(87) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(84) ColumnarExchange +(88) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(85) ShuffleQueryStage +(89) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(86) InputAdapter +(90) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(87) InputIteratorTransformer +(91) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(88) ReusedExchange [Reuses operator id: 15] +(92) ReusedExchange [Reuses operator id: 15] Output [2]: [s_suppkey#X, s_nationkey#X] -(89) ShuffleQueryStage +(93) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(94) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(91) InputIteratorTransformer +(95) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(92) ShuffledHashJoinExecTransformer +(96) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(93) ProjectExecTransformer +(97) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(94) WholeStageCodegenTransformer (X) +(98) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(95) VeloxAppendBatches +(99) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(96) ColumnarExchange +(100) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(97) ShuffleQueryStage +(101) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(98) InputAdapter +(102) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(99) InputIteratorTransformer +(103) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(100) ReusedExchange [Reuses operator id: 32] +(104) ReusedExchange [Reuses operator id: 32] Output [1]: [n_nationkey#X] -(101) ShuffleQueryStage +(105) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(102) InputAdapter +(106) InputAdapter Input [1]: [n_nationkey#X] -(103) InputIteratorTransformer +(107) InputIteratorTransformer Input [1]: [n_nationkey#X] -(104) ShuffledHashJoinExecTransformer +(108) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(105) ProjectExecTransformer +(109) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(106) RegularHashAggregateExecTransformer +(110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(107) RegularHashAggregateExecTransformer +(111) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(108) ProjectExecTransformer +(112) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(109) WholeStageCodegenTransformer (X) +(113) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(110) VeloxColumnarToRowExec +(114) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(111) Scan parquet +(115) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(112) Filter +(116) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(113) Exchange +(117) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Scan parquet +(118) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(119) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(115) Filter +(120) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(116) Exchange +(121) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(118) Project +(124) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(119) Exchange +(125) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) Scan parquet +(126) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(121) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(122) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(123) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(124) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(125) Project +(133) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(126) HashAggregate +(134) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(127) HashAggregate +(135) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(128) AdaptiveSparkPlan +(136) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/12.txt index dd1259eb8876..b0f084e2d048 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -218,60 +220,68 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/13.txt index d43ad2a9c271..07c32ff95fb1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ NoopFilter (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -229,74 +231,82 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/14.txt index cb3ddbb3a2f7..11bbb2a71e79 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -162,44 +164,52 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/15.txt index 60521d6c62cd..be97f58cf438 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (47) +- == Final Plan == VeloxColumnarToRowExec (30) +- AQEShuffleRead (29) @@ -27,20 +27,22 @@ AdaptiveSparkPlan (45) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (46) + +- Exchange (45) + +- Project (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -186,221 +188,229 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(42) Project +(44) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(45) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(46) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(47) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (71) +AdaptiveSparkPlan (73) +- == Final Plan == - VeloxColumnarToRowExec (62) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ ProjectExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ InputIteratorTransformer (56) - +- ShuffleQueryStage (54), Statistics(X) - +- ColumnarExchange (53) - +- VeloxAppendBatches (52) - +- ^ ProjectExecTransformer (50) - +- ^ FlushableHashAggregateExecTransformer (49) - +- ^ ProjectExecTransformer (48) - +- ^ NoopFilter (47) - +- ^ Scan parquet (46) + VeloxColumnarToRowExec (64) + +- ^ RegularHashAggregateExecTransformer (62) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxAppendBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ NoopFilter (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (70) - +- HashAggregate (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- Filter (64) - +- Scan parquet (63) + HashAggregate (72) + +- HashAggregate (71) + +- HashAggregate (70) + +- Exchange (69) + +- HashAggregate (68) + +- Project (67) + +- Filter (66) + +- Scan parquet (65) -(46) Scan parquet +(48) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(47) NoopFilter +(49) NoopFilter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(48) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(49) FlushableHashAggregateExecTransformer +(51) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(50) ProjectExecTransformer +(52) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(51) WholeStageCodegenTransformer (X) +(53) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(52) VeloxAppendBatches +(54) VeloxAppendBatches Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(53) ColumnarExchange +(55) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(54) ShuffleQueryStage +(56) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(55) InputAdapter +(57) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(56) InputIteratorTransformer +(58) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(57) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(58) ProjectExecTransformer +(60) ProjectExecTransformer Output [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(59) RegularHashAggregateExecTransformer +(61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(60) RegularHashAggregateExecTransformer +(62) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(61) WholeStageCodegenTransformer (X) +(63) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(62) VeloxColumnarToRowExec +(64) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(63) Scan parquet +(65) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(64) Filter +(66) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(65) Project +(67) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(66) HashAggregate +(68) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(67) Exchange +(69) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(70) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(69) HashAggregate +(71) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(70) HashAggregate +(72) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(71) AdaptiveSparkPlan +(73) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/16.txt index 029516a40506..86d2f321f653 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -296,74 +298,82 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/17.txt index e2d1503799a9..6a2e47576cad 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ NoopFilter (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -247,90 +250,102 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7)) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/18.txt index a0e052432bb3..7fe13a003017 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57), Statistics(X) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -401,154 +408,182 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/19.txt index 440383aa1cd7..34abb726b85a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -157,44 +159,52 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/20.txt index d3cff30e0ce2..2cf50b2a3a98 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (93) +- AQEShuffleRead (92) @@ -75,45 +75,55 @@ AdaptiveSparkPlan (133) +- ^ NoopFilter (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- Project (130) - +- ShuffledHashJoin Inner BuildRight (129) - :- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin LeftSemi BuildRight (122) - : :- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Exchange (121) - : +- Project (120) - : +- ShuffledHashJoin Inner BuildLeft (119) - : :- Exchange (105) - : : +- ShuffledHashJoin LeftSemi BuildRight (104) - : : :- Exchange (99) - : : : +- Filter (98) - : : : +- Scan parquet (97) - : : +- Exchange (103) - : : +- Project (102) - : : +- Filter (101) - : : +- Scan parquet (100) - : +- Exchange (118) - : +- Filter (117) - : +- HashAggregate (116) - : +- HashAggregate (115) - : +- ShuffledHashJoin LeftSemi BuildRight (114) - : :- Exchange (109) - : : +- Project (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (113) - : +- Project (112) - : +- Filter (111) - : +- Scan parquet (110) - +- Exchange (128) - +- Project (127) - +- Filter (126) - +- Scan parquet (125) + Sort (142) + +- Exchange (141) + +- Project (140) + +- SortMergeJoin Inner (139) + :- Sort (133) + : +- Exchange (132) + : +- Project (131) + : +- SortMergeJoin LeftSemi (130) + : :- Sort (97) + : : +- Exchange (96) + : : +- Filter (95) + : : +- Scan parquet (94) + : +- Sort (129) + : +- Exchange (128) + : +- Project (127) + : +- SortMergeJoin Inner (126) + : :- Sort (109) + : : +- Exchange (108) + : : +- SortMergeJoin LeftSemi (107) + : : :- Sort (101) + : : : +- Exchange (100) + : : : +- Filter (99) + : : : +- Scan parquet (98) + : : +- Sort (106) + : : +- Exchange (105) + : : +- Project (104) + : : +- Filter (103) + : : +- Scan parquet (102) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- HashAggregate (122) + : +- HashAggregate (121) + : +- SortMergeJoin LeftSemi (120) + : :- Sort (114) + : : +- Exchange (113) + : : +- Project (112) + : : +- Filter (111) + : : +- Scan parquet (110) + : +- Sort (119) + : +- Exchange (118) + : +- Project (117) + : +- Filter (116) + : +- Scan parquet (115) + +- Sort (138) + +- Exchange (137) + +- Project (136) + +- Filter (135) + +- Scan parquet (134) (1) Scan parquet @@ -507,176 +517,216 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Scan parquet +(97) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(98) Filter +(99) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(99) Exchange +(100) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(103) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(104) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(105) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(106) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(105) Exchange +(108) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(109) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(110) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(107) Filter +(111) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(108) Project +(112) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(109) Exchange +(113) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(114) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(115) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(111) Filter +(116) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(112) Project +(117) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(113) Exchange +(118) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(119) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(120) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(115) HashAggregate +(121) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(116) HashAggregate +(122) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(117) Filter +(123) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(118) Exchange +(124) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(125) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(120) Project +(127) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(121) Exchange +(128) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(129) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(130) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(123) Project +(131) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(124) Exchange +(132) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Scan parquet +(133) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(134) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(126) Filter +(135) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(127) Project +(136) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(128) Exchange +(137) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) ShuffledHashJoin +(138) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(139) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(130) Project +(140) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(131) Exchange +(141) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(142) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(133) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/21.txt index bd77a7f7f043..f92684b17b15 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (137) +- == Final Plan == VeloxColumnarToRowExec (91) +- ^ RegularHashAggregateExecTransformer (89) @@ -72,42 +72,51 @@ AdaptiveSparkPlan (128) +- ^ NoopFilter (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (127) - +- HashAggregate (126) - +- Exchange (125) - +- HashAggregate (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (110) - : : +- Project (109) - : : +- ShuffledHashJoin Inner BuildLeft (108) - : : :- Exchange (94) - : : : +- Filter (93) - : : : +- Scan parquet (92) - : : +- Exchange (107) - : : +- ShuffledHashJoin LeftAnti BuildRight (106) - : : :- ShuffledHashJoin LeftSemi BuildRight (101) - : : : :- Exchange (98) - : : : : +- Project (97) - : : : : +- Filter (96) - : : : : +- Scan parquet (95) - : : : +- Exchange (100) - : : : +- Scan parquet (99) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Exchange (114) - : +- Project (113) - : +- Filter (112) - : +- Scan parquet (111) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + TakeOrderedAndProject (136) + +- HashAggregate (135) + +- Exchange (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- SortMergeJoin Inner (113) + : : :- Sort (95) + : : : +- Exchange (94) + : : : +- Filter (93) + : : : +- Scan parquet (92) + : : +- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftAnti (110) + : : :- SortMergeJoin LeftSemi (104) + : : : :- Sort (100) + : : : : +- Exchange (99) + : : : : +- Project (98) + : : : : +- Filter (97) + : : : : +- Scan parquet (96) + : : : +- Sort (103) + : : : +- Exchange (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (1) Scan parquet @@ -496,163 +505,199 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) Scan parquet +(95) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(96) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(96) Filter +(97) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(97) Project +(98) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(98) Exchange +(99) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(100) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(100) Exchange +(102) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) ShuffledHashJoin +(103) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(102) Scan parquet +(105) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(103) Filter +(106) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(104) Project +(107) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(105) Exchange +(108) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(109) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(107) Exchange +(111) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(114) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(110) Exchange +(115) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(116) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(117) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(112) Filter +(118) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(113) Project +(119) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(114) Exchange +(120) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(121) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(116) Project +(123) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(117) Exchange +(124) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(125) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(120) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(130) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(123) Project +(132) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(124) HashAggregate +(133) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(125) Exchange +(134) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) HashAggregate +(135) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(127) TakeOrderedAndProject +(136) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(128) AdaptiveSparkPlan +(137) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/22.txt index 7cf55b4c0f2d..1c2790a4a999 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -210,170 +212,178 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ NoopFilter (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ NoopFilter (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) -(51) Scan parquet +(53) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(52) NoopFilter +(54) NoopFilter Input [2]: [c_phone#X, c_acctbal#X] Arguments: [c_phone#X, c_acctbal#X] -(53) ProjectExecTransformer +(55) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(54) FlushableHashAggregateExecTransformer +(56) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(55) WholeStageCodegenTransformer (X) +(57) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(56) VeloxAppendBatches +(58) VeloxAppendBatches Input [2]: [sum#X, count#X] Arguments: X -(57) ColumnarExchange +(59) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(58) ShuffleQueryStage +(60) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(59) InputAdapter +(61) InputAdapter Input [2]: [sum#X, count#X] -(60) InputIteratorTransformer +(62) InputIteratorTransformer Input [2]: [sum#X, count#X] -(61) RegularHashAggregateExecTransformer +(63) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(62) WholeStageCodegenTransformer (X) +(64) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(63) VeloxColumnarToRowExec +(65) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(64) Scan parquet +(66) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(65) Filter +(67) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(66) Project +(68) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(67) HashAggregate +(69) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(68) Exchange +(70) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(69) HashAggregate +(71) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(70) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/3.txt index 8b1f048c7d6e..1f9905294144 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -256,80 +260,96 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/4.txt index 1b680584826d..130bc2983040 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -223,60 +225,68 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/5.txt index 67159dbb648a..10ce074fd760 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -567,176 +577,216 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/7.txt index 71742ea423b5..651cfa840be8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79), Statistics(X) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -537,168 +547,208 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/8.txt index 319e6c9f1b21..e359f4c944e7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ NoopFilter (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -750,228 +764,284 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6)) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/9.txt index 40dee1752399..21c91ca14180 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -570,168 +580,208 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4)) as decimal(27,4)))), DecimalType(27,4)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/10.txt index 94e1100ea37d..5e48ceb742d7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ NoopFilter (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -374,119 +380,143 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/11.txt index 41d23099319f..6c9eef6b1f1f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -313,365 +317,401 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (136) +- == Final Plan == - VeloxColumnarToRowExec (110) - +- ^ ProjectExecTransformer (108) - +- ^ RegularHashAggregateExecTransformer (107) - +- ^ RegularHashAggregateExecTransformer (106) - +- ^ ProjectExecTransformer (105) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (104) - :- ^ InputIteratorTransformer (99) - : +- ShuffleQueryStage (97), Statistics(X) - : +- ColumnarExchange (96) - : +- VeloxAppendBatches (95) - : +- ^ ProjectExecTransformer (93) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (92) - : :- ^ InputIteratorTransformer (87) - : : +- ShuffleQueryStage (85), Statistics(X) - : : +- ColumnarExchange (84) - : : +- VeloxAppendBatches (83) - : : +- ^ ProjectExecTransformer (81) - : : +- ^ NoopFilter (80) - : : +- ^ Scan parquet (79) - : +- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ReusedExchange (88) - +- ^ InputIteratorTransformer (103) - +- ShuffleQueryStage (101), Statistics(X) - +- ReusedExchange (100) + VeloxColumnarToRowExec (114) + +- ^ ProjectExecTransformer (112) + +- ^ RegularHashAggregateExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxAppendBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxAppendBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ NoopFilter (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (127) - +- HashAggregate (126) - +- Project (125) - +- ShuffledHashJoin Inner BuildRight (124) - :- Exchange (119) - : +- Project (118) - : +- ShuffledHashJoin Inner BuildRight (117) - : :- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (116) - : +- Filter (115) - : +- Scan parquet (114) - +- Exchange (123) - +- Project (122) - +- Filter (121) - +- Scan parquet (120) - - -(79) Scan parquet + HashAggregate (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (118) + : : +- Exchange (117) + : : +- Filter (116) + : : +- Scan parquet (115) + : +- Sort (122) + : +- Exchange (121) + : +- Filter (120) + : +- Scan parquet (119) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) + + +(83) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(80) NoopFilter +(84) NoopFilter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(81) ProjectExecTransformer +(85) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(82) WholeStageCodegenTransformer (X) +(86) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(83) VeloxAppendBatches +(87) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(84) ColumnarExchange +(88) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(85) ShuffleQueryStage +(89) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(86) InputAdapter +(90) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(87) InputIteratorTransformer +(91) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(88) ReusedExchange [Reuses operator id: 15] +(92) ReusedExchange [Reuses operator id: 15] Output [2]: [s_suppkey#X, s_nationkey#X] -(89) ShuffleQueryStage +(93) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(94) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(91) InputIteratorTransformer +(95) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(92) ShuffledHashJoinExecTransformer +(96) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(93) ProjectExecTransformer +(97) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(94) WholeStageCodegenTransformer (X) +(98) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(95) VeloxAppendBatches +(99) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(96) ColumnarExchange +(100) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(97) ShuffleQueryStage +(101) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(98) InputAdapter +(102) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(99) InputIteratorTransformer +(103) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(100) ReusedExchange [Reuses operator id: 32] +(104) ReusedExchange [Reuses operator id: 32] Output [1]: [n_nationkey#X] -(101) ShuffleQueryStage +(105) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(102) InputAdapter +(106) InputAdapter Input [1]: [n_nationkey#X] -(103) InputIteratorTransformer +(107) InputIteratorTransformer Input [1]: [n_nationkey#X] -(104) ShuffledHashJoinExecTransformer +(108) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(105) ProjectExecTransformer +(109) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(106) RegularHashAggregateExecTransformer +(110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(107) RegularHashAggregateExecTransformer +(111) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(108) ProjectExecTransformer +(112) ProjectExecTransformer Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(109) WholeStageCodegenTransformer (X) +(113) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(110) VeloxColumnarToRowExec +(114) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(111) Scan parquet +(115) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(112) Filter +(116) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(113) Exchange +(117) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Scan parquet +(118) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(119) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(115) Filter +(120) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(116) Exchange +(121) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(118) Project +(124) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(119) Exchange +(125) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) Scan parquet +(126) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(121) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(122) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(123) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(124) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(125) Project +(133) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(126) HashAggregate +(134) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(127) HashAggregate +(135) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(128) AdaptiveSparkPlan +(136) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/12.txt index 9995164f4c49..8f963c49aba8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -219,61 +221,69 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/13.txt index 53801198fb49..9584f92628cd 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ NoopFilter (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -230,75 +232,83 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftOuter Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/14.txt index 4080a469e711..175a0e5a97cc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -163,45 +165,53 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/15.txt index a177fe4bcaec..130d9036b4a2 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (47) +- == Final Plan == VeloxColumnarToRowExec (30) +- AQEShuffleRead (29) @@ -27,20 +27,22 @@ AdaptiveSparkPlan (45) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (46) + +- Exchange (45) + +- Project (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -187,222 +189,230 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join type: Inner Join condition: None -(42) Project +(44) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(45) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(46) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(47) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (71) +AdaptiveSparkPlan (73) +- == Final Plan == - VeloxColumnarToRowExec (62) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ ProjectExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ InputIteratorTransformer (56) - +- ShuffleQueryStage (54), Statistics(X) - +- ColumnarExchange (53) - +- VeloxAppendBatches (52) - +- ^ ProjectExecTransformer (50) - +- ^ FlushableHashAggregateExecTransformer (49) - +- ^ ProjectExecTransformer (48) - +- ^ NoopFilter (47) - +- ^ Scan parquet (46) + VeloxColumnarToRowExec (64) + +- ^ RegularHashAggregateExecTransformer (62) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxAppendBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ NoopFilter (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (70) - +- HashAggregate (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- Filter (64) - +- Scan parquet (63) + HashAggregate (72) + +- HashAggregate (71) + +- HashAggregate (70) + +- Exchange (69) + +- HashAggregate (68) + +- Project (67) + +- Filter (66) + +- Scan parquet (65) -(46) Scan parquet +(48) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(47) NoopFilter +(49) NoopFilter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(48) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(49) FlushableHashAggregateExecTransformer +(51) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(50) ProjectExecTransformer +(52) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(51) WholeStageCodegenTransformer (X) +(53) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(52) VeloxAppendBatches +(54) VeloxAppendBatches Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(53) ColumnarExchange +(55) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(54) ShuffleQueryStage +(56) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(55) InputAdapter +(57) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(56) InputIteratorTransformer +(58) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(57) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(58) ProjectExecTransformer +(60) ProjectExecTransformer Output [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(59) RegularHashAggregateExecTransformer +(61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(60) RegularHashAggregateExecTransformer +(62) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(61) WholeStageCodegenTransformer (X) +(63) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(62) VeloxColumnarToRowExec +(64) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(63) Scan parquet +(65) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(64) Filter +(66) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(65) Project +(67) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(66) HashAggregate +(68) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(67) Exchange +(69) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(70) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(69) HashAggregate +(71) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(70) HashAggregate +(72) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(71) AdaptiveSparkPlan +(73) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/16.txt index 89a647ffce45..32a24beb94b9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -298,75 +300,83 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/17.txt index 42fc32b0bce1..59baa2d7a08c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ NoopFilter (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -249,92 +252,104 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/18.txt index ca913b2d4c84..c78e265e54d9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57), Statistics(X) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -405,158 +412,186 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/19.txt index 91187ac8d5a7..a9c629524fb7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -158,45 +160,53 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/20.txt index 01ab88ee0b2b..8e929ff7b296 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (93) +- AQEShuffleRead (92) @@ -75,45 +75,55 @@ AdaptiveSparkPlan (133) +- ^ NoopFilter (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- Project (130) - +- ShuffledHashJoin Inner BuildRight (129) - :- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin LeftSemi BuildRight (122) - : :- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Exchange (121) - : +- Project (120) - : +- ShuffledHashJoin Inner BuildLeft (119) - : :- Exchange (105) - : : +- ShuffledHashJoin LeftSemi BuildRight (104) - : : :- Exchange (99) - : : : +- Filter (98) - : : : +- Scan parquet (97) - : : +- Exchange (103) - : : +- Project (102) - : : +- Filter (101) - : : +- Scan parquet (100) - : +- Exchange (118) - : +- Filter (117) - : +- HashAggregate (116) - : +- HashAggregate (115) - : +- ShuffledHashJoin LeftSemi BuildRight (114) - : :- Exchange (109) - : : +- Project (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (113) - : +- Project (112) - : +- Filter (111) - : +- Scan parquet (110) - +- Exchange (128) - +- Project (127) - +- Filter (126) - +- Scan parquet (125) + Sort (142) + +- Exchange (141) + +- Project (140) + +- SortMergeJoin Inner (139) + :- Sort (133) + : +- Exchange (132) + : +- Project (131) + : +- SortMergeJoin LeftSemi (130) + : :- Sort (97) + : : +- Exchange (96) + : : +- Filter (95) + : : +- Scan parquet (94) + : +- Sort (129) + : +- Exchange (128) + : +- Project (127) + : +- SortMergeJoin Inner (126) + : :- Sort (109) + : : +- Exchange (108) + : : +- SortMergeJoin LeftSemi (107) + : : :- Sort (101) + : : : +- Exchange (100) + : : : +- Filter (99) + : : : +- Scan parquet (98) + : : +- Sort (106) + : : +- Exchange (105) + : : +- Project (104) + : : +- Filter (103) + : : +- Scan parquet (102) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- HashAggregate (122) + : +- HashAggregate (121) + : +- SortMergeJoin LeftSemi (120) + : :- Sort (114) + : : +- Exchange (113) + : : +- Project (112) + : : +- Filter (111) + : : +- Scan parquet (110) + : +- Sort (119) + : +- Exchange (118) + : +- Project (117) + : +- Filter (116) + : +- Scan parquet (115) + +- Sort (138) + +- Exchange (137) + +- Project (136) + +- Filter (135) + +- Scan parquet (134) (1) Scan parquet @@ -512,181 +522,221 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Scan parquet +(97) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(98) Filter +(99) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(99) Exchange +(100) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(103) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(104) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(105) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(106) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(105) Exchange +(108) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(109) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(110) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(107) Filter +(111) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(108) Project +(112) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(109) Exchange +(113) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(114) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(115) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(111) Filter +(116) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(112) Project +(117) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(113) Exchange +(118) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(119) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(120) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(115) HashAggregate +(121) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(116) HashAggregate +(122) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(117) Filter +(123) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(118) Exchange +(124) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(125) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(120) Project +(127) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(121) Exchange +(128) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(129) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(130) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(123) Project +(131) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(124) Exchange +(132) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Scan parquet +(133) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(134) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(126) Filter +(135) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(127) Project +(136) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(128) Exchange +(137) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) ShuffledHashJoin +(138) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(139) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(130) Project +(140) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(131) Exchange +(141) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(142) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(133) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/21.txt index 317740080d7b..279f4f096692 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (137) +- == Final Plan == VeloxColumnarToRowExec (91) +- ^ RegularHashAggregateExecTransformer (89) @@ -72,42 +72,51 @@ AdaptiveSparkPlan (128) +- ^ NoopFilter (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (127) - +- HashAggregate (126) - +- Exchange (125) - +- HashAggregate (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (110) - : : +- Project (109) - : : +- ShuffledHashJoin Inner BuildLeft (108) - : : :- Exchange (94) - : : : +- Filter (93) - : : : +- Scan parquet (92) - : : +- Exchange (107) - : : +- ShuffledHashJoin LeftAnti BuildRight (106) - : : :- ShuffledHashJoin LeftSemi BuildRight (101) - : : : :- Exchange (98) - : : : : +- Project (97) - : : : : +- Filter (96) - : : : : +- Scan parquet (95) - : : : +- Exchange (100) - : : : +- Scan parquet (99) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Exchange (114) - : +- Project (113) - : +- Filter (112) - : +- Scan parquet (111) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + TakeOrderedAndProject (136) + +- HashAggregate (135) + +- Exchange (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- SortMergeJoin Inner (113) + : : :- Sort (95) + : : : +- Exchange (94) + : : : +- Filter (93) + : : : +- Scan parquet (92) + : : +- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftAnti (110) + : : :- SortMergeJoin LeftSemi (104) + : : : :- Sort (100) + : : : : +- Exchange (99) + : : : : +- Project (98) + : : : : +- Filter (97) + : : : : +- Scan parquet (96) + : : : +- Sort (103) + : : : +- Exchange (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (1) Scan parquet @@ -501,168 +510,204 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) Scan parquet +(95) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(96) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(96) Filter +(97) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(97) Project +(98) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(98) Exchange +(99) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(100) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(100) Exchange +(102) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) ShuffledHashJoin +(103) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: NOT (l_suppkey#X = l_suppkey#X) -(102) Scan parquet +(105) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(103) Filter +(106) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(104) Project +(107) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(105) Exchange +(108) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(109) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftAnti Join condition: NOT (l_suppkey#X = l_suppkey#X) -(107) Exchange +(111) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(109) Project +(114) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(110) Exchange +(115) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(116) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(117) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(112) Filter +(118) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(113) Project +(119) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(114) Exchange +(120) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(121) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(116) Project +(123) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(117) Exchange +(124) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(125) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(120) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(130) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(123) Project +(132) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(124) HashAggregate +(133) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(125) Exchange +(134) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) HashAggregate +(135) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(127) TakeOrderedAndProject +(136) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(128) AdaptiveSparkPlan +(137) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/22.txt index 0d779c9160cf..2b93055014bd 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -211,171 +213,179 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftAnti Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ NoopFilter (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ NoopFilter (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) -(51) Scan parquet +(53) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(52) NoopFilter +(54) NoopFilter Input [2]: [c_phone#X, c_acctbal#X] Arguments: [c_phone#X, c_acctbal#X] -(53) ProjectExecTransformer +(55) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(54) FlushableHashAggregateExecTransformer +(56) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(55) WholeStageCodegenTransformer (X) +(57) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(56) VeloxAppendBatches +(58) VeloxAppendBatches Input [2]: [sum#X, count#X] Arguments: X -(57) ColumnarExchange +(59) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(58) ShuffleQueryStage +(60) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(59) InputAdapter +(61) InputAdapter Input [2]: [sum#X, count#X] -(60) InputIteratorTransformer +(62) InputIteratorTransformer Input [2]: [sum#X, count#X] -(61) RegularHashAggregateExecTransformer +(63) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(62) WholeStageCodegenTransformer (X) +(64) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(63) VeloxColumnarToRowExec +(65) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(64) Scan parquet +(66) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(65) Filter +(67) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(66) Project +(68) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(67) HashAggregate +(69) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(68) Exchange +(70) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(69) HashAggregate +(71) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(70) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/3.txt index 8c671a61c9f7..aa679861da7c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -258,82 +262,98 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/4.txt index 3d145f0c3bb8..02c494288f95 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -224,61 +226,69 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/5.txt index 08e655f5aa81..67150984ab61 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -572,181 +582,221 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join type: Inner Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/7.txt index 71ade94be21d..65dfab993c3c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79), Statistics(X) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -542,173 +552,213 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/8.txt index ddeab25c4569..61f6287c2429 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ NoopFilter (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -757,235 +771,291 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/9.txt index 634e3516a710..4b983de23fde 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -575,173 +585,213 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join type: Inner Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt index c5fcd91867cb..993884df3f3a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ FilterExecTransformer (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -371,116 +377,140 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt index 59de06707aad..8142375d9ead 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -311,92 +315,108 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt index a8ac5d0d2c1b..802a79759235 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -218,60 +220,68 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt index d65867ecf822..b9bf0f1fad60 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -229,74 +231,82 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt index 2bc0be8fcb67..425c55f5a4ce 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -162,44 +164,52 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt index 0d21930825c7..f003eed994d0 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (48) +AdaptiveSparkPlan (50) +- == Final Plan == VeloxColumnarToRowExec (33) +- ^ SortExecTransformer (31) @@ -28,20 +28,22 @@ AdaptiveSparkPlan (48) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (47) - +- Exchange (46) - +- Project (45) - +- ShuffledHashJoin Inner BuildLeft (44) - :- Exchange (36) - : +- Filter (35) - : +- Scan parquet (34) - +- Filter (43) - +- HashAggregate (42) - +- Exchange (41) - +- HashAggregate (40) - +- Project (39) - +- Filter (38) - +- Scan parquet (37) + Sort (49) + +- Exchange (48) + +- Project (47) + +- SortMergeJoin Inner (46) + :- Sort (37) + : +- Exchange (36) + : +- Filter (35) + : +- Scan parquet (34) + +- Sort (45) + +- Filter (44) + +- HashAggregate (43) + +- Exchange (42) + +- HashAggregate (41) + +- Project (40) + +- Filter (39) + +- Scan parquet (38) (1) Scan parquet @@ -197,60 +199,68 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(37) Scan parquet +(37) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(38) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(38) Filter +(39) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(39) Project +(40) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(40) HashAggregate +(41) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(41) Exchange +(42) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) HashAggregate +(43) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS total_revenue#X] -(43) Filter +(44) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(44) ShuffledHashJoin +(45) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(46) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(45) Project +(47) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(46) Exchange +(48) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) Sort +(49) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(48) AdaptiveSparkPlan +(50) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt index cd3e53ad7bd6..c9374b01ff02 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -296,74 +298,82 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt index fc17c87d7df0..69f50fa16ab0 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ FilterExecTransformer (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -247,90 +250,102 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7), true) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt index fc65f4b52897..96fa1cd82606 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -401,154 +408,182 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt index d3d74c5ba792..ae0feb5dfd56 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -157,44 +159,52 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt index df1ae98f903e..bab785551636 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (146) +- == Final Plan == VeloxColumnarToRowExec (96) +- ^ SortExecTransformer (94) @@ -76,45 +76,55 @@ AdaptiveSparkPlan (136) +- ^ FilterExecTransformer (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (135) - +- Exchange (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (127) - : +- Project (126) - : +- ShuffledHashJoin LeftSemi BuildRight (125) - : :- Exchange (99) - : : +- Filter (98) - : : +- Scan parquet (97) - : +- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin Inner BuildLeft (122) - : :- Exchange (108) - : : +- ShuffledHashJoin LeftSemi BuildRight (107) - : : :- Exchange (102) - : : : +- Filter (101) - : : : +- Scan parquet (100) - : : +- Exchange (106) - : : +- Project (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (121) - : +- Filter (120) - : +- HashAggregate (119) - : +- HashAggregate (118) - : +- ShuffledHashJoin LeftSemi BuildRight (117) - : :- Exchange (112) - : : +- Project (111) - : : +- Filter (110) - : : +- Scan parquet (109) - : +- Exchange (116) - : +- Project (115) - : +- Filter (114) - : +- Scan parquet (113) - +- Exchange (131) - +- Project (130) - +- Filter (129) - +- Scan parquet (128) + Sort (145) + +- Exchange (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (136) + : +- Exchange (135) + : +- Project (134) + : +- SortMergeJoin LeftSemi (133) + : :- Sort (100) + : : +- Exchange (99) + : : +- Filter (98) + : : +- Scan parquet (97) + : +- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin Inner (129) + : :- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftSemi (110) + : : :- Sort (104) + : : : +- Exchange (103) + : : : +- Filter (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (128) + : +- Exchange (127) + : +- Filter (126) + : +- HashAggregate (125) + : +- HashAggregate (124) + : +- SortMergeJoin LeftSemi (123) + : :- Sort (117) + : : +- Exchange (116) + : : +- Project (115) + : : +- Filter (114) + : : +- Scan parquet (113) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (141) + +- Exchange (140) + +- Project (139) + +- Filter (138) + +- Scan parquet (137) (1) Scan parquet @@ -518,176 +528,216 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(100) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(101) Filter +(102) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(102) Exchange +(103) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) Scan parquet +(104) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(105) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(104) Filter +(106) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(105) Project +(107) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(106) Exchange +(108) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(109) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(108) Exchange +(111) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Scan parquet +(112) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(110) Filter +(114) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(111) Project +(115) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(112) Exchange +(116) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(113) Scan parquet +(117) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(114) Filter +(119) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(115) Project +(120) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(116) Exchange +(121) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(118) HashAggregate +(124) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(119) HashAggregate +(125) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(120) Filter +(126) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(121) Exchange +(127) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(128) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(129) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(123) Project +(130) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(124) Exchange +(131) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) ShuffledHashJoin +(132) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(126) Project +(134) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(127) Exchange +(135) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(128) Scan parquet +(136) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(137) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(129) Filter +(138) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(130) Project +(139) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(131) Exchange +(140) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(143) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(134) Exchange +(144) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Sort +(145) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(136) AdaptiveSparkPlan +(146) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt index c0f3602f9fe6..ef4e87bb1de4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (129) +AdaptiveSparkPlan (138) +- == Final Plan == VeloxColumnarToRowExec (92) +- TakeOrderedAndProjectExecTransformer (91) @@ -73,42 +73,51 @@ AdaptiveSparkPlan (129) +- ^ FilterExecTransformer (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (128) - +- HashAggregate (127) - +- Exchange (126) - +- HashAggregate (125) - +- Project (124) - +- ShuffledHashJoin Inner BuildRight (123) - :- Exchange (118) - : +- Project (117) - : +- ShuffledHashJoin Inner BuildRight (116) - : :- Exchange (111) - : : +- Project (110) - : : +- ShuffledHashJoin Inner BuildLeft (109) - : : :- Exchange (95) - : : : +- Filter (94) - : : : +- Scan parquet (93) - : : +- Exchange (108) - : : +- ShuffledHashJoin LeftAnti BuildRight (107) - : : :- ShuffledHashJoin LeftSemi BuildRight (102) - : : : :- Exchange (99) - : : : : +- Project (98) - : : : : +- Filter (97) - : : : : +- Scan parquet (96) - : : : +- Exchange (101) - : : : +- Scan parquet (100) - : : +- Exchange (106) - : : +- Project (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (115) - : +- Project (114) - : +- Filter (113) - : +- Scan parquet (112) - +- Exchange (122) - +- Project (121) - +- Filter (120) - +- Scan parquet (119) + TakeOrderedAndProject (137) + +- HashAggregate (136) + +- Exchange (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (117) + : : +- Exchange (116) + : : +- Project (115) + : : +- SortMergeJoin Inner (114) + : : :- Sort (96) + : : : +- Exchange (95) + : : : +- Filter (94) + : : : +- Scan parquet (93) + : : +- Sort (113) + : : +- Exchange (112) + : : +- SortMergeJoin LeftAnti (111) + : : :- SortMergeJoin LeftSemi (105) + : : : :- Sort (101) + : : : : +- Exchange (100) + : : : : +- Project (99) + : : : : +- Filter (98) + : : : : +- Scan parquet (97) + : : : +- Sort (104) + : : : +- Exchange (103) + : : : +- Scan parquet (102) + : : +- Sort (110) + : : +- Exchange (109) + : : +- Project (108) + : : +- Filter (107) + : : +- Scan parquet (106) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) (1) Scan parquet @@ -501,163 +510,199 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Scan parquet +(96) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(97) Filter +(98) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(98) Project +(99) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(99) Exchange +(100) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(101) Exchange +(103) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(104) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(105) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(103) Scan parquet +(106) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(104) Filter +(107) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(105) Project +(108) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(106) Exchange +(109) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(110) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(111) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(108) Exchange +(112) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(113) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(114) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(110) Project +(115) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(111) Exchange +(116) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(117) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(113) Filter +(119) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(114) Project +(120) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(115) Exchange +(121) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(116) ShuffledHashJoin +(122) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(117) Project +(124) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(118) Exchange +(125) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Scan parquet +(126) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(120) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(121) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(122) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(124) Project +(133) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(125) HashAggregate +(134) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(126) Exchange +(135) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) HashAggregate +(136) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(128) TakeOrderedAndProject +(137) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(129) AdaptiveSparkPlan +(138) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt index 5ab0811e658f..fcf712a9d5fd 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -210,51 +212,59 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt index c51701bd0840..607d6444f432 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -256,80 +260,96 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt index 1b95ae3dbf39..cc6b8f351600 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -223,60 +225,68 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt index c31fbccc1e59..a1f95887aae3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -567,176 +577,216 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt index 06b84fdca2c7..64d51413a084 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -537,168 +547,208 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt index e9fdc420f128..8934d1a2e7a3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ FilterExecTransformer (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -750,228 +764,284 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6), true) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt index d6bc308a9c2a..cb207c0800c3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -570,168 +580,208 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4), true) as decimal(27,4)))), DecimalType(27,4), true) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt index fcb13291c838..c295515b8a6c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ FilterExecTransformer (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -371,116 +377,140 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt index bbd20320b798..20bb486f3841 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -311,359 +315,395 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (136) +- == Final Plan == - VeloxColumnarToRowExec (110) - +- ^ ProjectExecTransformer (108) - +- ^ RegularHashAggregateExecTransformer (107) - +- ^ RegularHashAggregateExecTransformer (106) - +- ^ ProjectExecTransformer (105) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (104) - :- ^ InputIteratorTransformer (99) - : +- ShuffleQueryStage (97), Statistics(X) - : +- ColumnarExchange (96) - : +- VeloxAppendBatches (95) - : +- ^ ProjectExecTransformer (93) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (92) - : :- ^ InputIteratorTransformer (87) - : : +- ShuffleQueryStage (85), Statistics(X) - : : +- ColumnarExchange (84) - : : +- VeloxAppendBatches (83) - : : +- ^ ProjectExecTransformer (81) - : : +- ^ FilterExecTransformer (80) - : : +- ^ Scan parquet (79) - : +- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ReusedExchange (88) - +- ^ InputIteratorTransformer (103) - +- ShuffleQueryStage (101), Statistics(X) - +- ReusedExchange (100) + VeloxColumnarToRowExec (114) + +- ^ ProjectExecTransformer (112) + +- ^ RegularHashAggregateExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxAppendBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxAppendBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (127) - +- HashAggregate (126) - +- Project (125) - +- ShuffledHashJoin Inner BuildRight (124) - :- Exchange (119) - : +- Project (118) - : +- ShuffledHashJoin Inner BuildRight (117) - : :- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (116) - : +- Filter (115) - : +- Scan parquet (114) - +- Exchange (123) - +- Project (122) - +- Filter (121) - +- Scan parquet (120) - - -(79) Scan parquet + HashAggregate (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (118) + : : +- Exchange (117) + : : +- Filter (116) + : : +- Scan parquet (115) + : +- Sort (122) + : +- Exchange (121) + : +- Filter (120) + : +- Scan parquet (119) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) + + +(83) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(80) FilterExecTransformer +(84) FilterExecTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: isnotnull(ps_suppkey#X) -(81) ProjectExecTransformer +(85) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(82) WholeStageCodegenTransformer (X) +(86) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(83) VeloxAppendBatches +(87) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(84) ColumnarExchange +(88) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(85) ShuffleQueryStage +(89) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(86) InputAdapter +(90) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(87) InputIteratorTransformer +(91) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(88) ReusedExchange [Reuses operator id: 15] +(92) ReusedExchange [Reuses operator id: 15] Output [2]: [s_suppkey#X, s_nationkey#X] -(89) ShuffleQueryStage +(93) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(94) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(91) InputIteratorTransformer +(95) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(92) ShuffledHashJoinExecTransformer +(96) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(93) ProjectExecTransformer +(97) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(94) WholeStageCodegenTransformer (X) +(98) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(95) VeloxAppendBatches +(99) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(96) ColumnarExchange +(100) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(97) ShuffleQueryStage +(101) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(98) InputAdapter +(102) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(99) InputIteratorTransformer +(103) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(100) ReusedExchange [Reuses operator id: 32] +(104) ReusedExchange [Reuses operator id: 32] Output [1]: [n_nationkey#X] -(101) ShuffleQueryStage +(105) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(102) InputAdapter +(106) InputAdapter Input [1]: [n_nationkey#X] -(103) InputIteratorTransformer +(107) InputIteratorTransformer Input [1]: [n_nationkey#X] -(104) ShuffledHashJoinExecTransformer +(108) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(105) ProjectExecTransformer +(109) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(106) RegularHashAggregateExecTransformer +(110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(107) RegularHashAggregateExecTransformer +(111) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(108) ProjectExecTransformer +(112) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(109) WholeStageCodegenTransformer (X) +(113) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(110) VeloxColumnarToRowExec +(114) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(111) Scan parquet +(115) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(112) Filter +(116) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(113) Exchange +(117) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Scan parquet +(118) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(119) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(115) Filter +(120) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(116) Exchange +(121) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(118) Project +(124) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(119) Exchange +(125) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) Scan parquet +(126) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(121) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(122) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(123) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(124) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(125) Project +(133) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(126) HashAggregate +(134) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(127) HashAggregate +(135) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(128) AdaptiveSparkPlan +(136) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt index 194b60bb7713..1b36d274aab4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -218,60 +220,68 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt index a9d4e199cfd2..83ec9aeda98a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -229,74 +231,82 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt index afac58cb52bc..a1108606b5bb 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -162,44 +164,52 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt index fec017400c11..88730deb3c32 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (47) +- == Final Plan == VeloxColumnarToRowExec (30) +- AQEShuffleRead (29) @@ -27,20 +27,22 @@ AdaptiveSparkPlan (45) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (46) + +- Exchange (45) + +- Project (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -186,221 +188,229 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(42) Project +(44) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(45) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(46) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(47) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (71) +AdaptiveSparkPlan (73) +- == Final Plan == - VeloxColumnarToRowExec (62) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ ProjectExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ InputIteratorTransformer (56) - +- ShuffleQueryStage (54), Statistics(X) - +- ColumnarExchange (53) - +- VeloxAppendBatches (52) - +- ^ ProjectExecTransformer (50) - +- ^ FlushableHashAggregateExecTransformer (49) - +- ^ ProjectExecTransformer (48) - +- ^ FilterExecTransformer (47) - +- ^ Scan parquet (46) + VeloxColumnarToRowExec (64) + +- ^ RegularHashAggregateExecTransformer (62) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxAppendBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (70) - +- HashAggregate (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- Filter (64) - +- Scan parquet (63) + HashAggregate (72) + +- HashAggregate (71) + +- HashAggregate (70) + +- Exchange (69) + +- HashAggregate (68) + +- Project (67) + +- Filter (66) + +- Scan parquet (65) -(46) Scan parquet +(48) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(47) FilterExecTransformer +(49) FilterExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(48) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(49) FlushableHashAggregateExecTransformer +(51) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(50) ProjectExecTransformer +(52) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(51) WholeStageCodegenTransformer (X) +(53) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(52) VeloxAppendBatches +(54) VeloxAppendBatches Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(53) ColumnarExchange +(55) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(54) ShuffleQueryStage +(56) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(55) InputAdapter +(57) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(56) InputIteratorTransformer +(58) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(57) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(58) ProjectExecTransformer +(60) ProjectExecTransformer Output [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(59) RegularHashAggregateExecTransformer +(61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(60) RegularHashAggregateExecTransformer +(62) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(61) WholeStageCodegenTransformer (X) +(63) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(62) VeloxColumnarToRowExec +(64) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(63) Scan parquet +(65) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(64) Filter +(66) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(65) Project +(67) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(66) HashAggregate +(68) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(67) Exchange +(69) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(70) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(69) HashAggregate +(71) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(70) HashAggregate +(72) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(71) AdaptiveSparkPlan +(73) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt index 15dd2fa6da8e..535b6940301d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -296,74 +298,82 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt index 69af0fd38e92..d360b6c948e3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ FilterExecTransformer (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -247,90 +250,102 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7)) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt index 8db05ed7572c..a664adfd3175 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57), Statistics(X) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -401,154 +408,182 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt index 14a5515d1e79..58e80362020f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -157,44 +159,52 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt index 7b840720bc90..c22b822e6f7d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (93) +- AQEShuffleRead (92) @@ -75,45 +75,55 @@ AdaptiveSparkPlan (133) +- ^ FilterExecTransformer (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- Project (130) - +- ShuffledHashJoin Inner BuildRight (129) - :- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin LeftSemi BuildRight (122) - : :- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Exchange (121) - : +- Project (120) - : +- ShuffledHashJoin Inner BuildLeft (119) - : :- Exchange (105) - : : +- ShuffledHashJoin LeftSemi BuildRight (104) - : : :- Exchange (99) - : : : +- Filter (98) - : : : +- Scan parquet (97) - : : +- Exchange (103) - : : +- Project (102) - : : +- Filter (101) - : : +- Scan parquet (100) - : +- Exchange (118) - : +- Filter (117) - : +- HashAggregate (116) - : +- HashAggregate (115) - : +- ShuffledHashJoin LeftSemi BuildRight (114) - : :- Exchange (109) - : : +- Project (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (113) - : +- Project (112) - : +- Filter (111) - : +- Scan parquet (110) - +- Exchange (128) - +- Project (127) - +- Filter (126) - +- Scan parquet (125) + Sort (142) + +- Exchange (141) + +- Project (140) + +- SortMergeJoin Inner (139) + :- Sort (133) + : +- Exchange (132) + : +- Project (131) + : +- SortMergeJoin LeftSemi (130) + : :- Sort (97) + : : +- Exchange (96) + : : +- Filter (95) + : : +- Scan parquet (94) + : +- Sort (129) + : +- Exchange (128) + : +- Project (127) + : +- SortMergeJoin Inner (126) + : :- Sort (109) + : : +- Exchange (108) + : : +- SortMergeJoin LeftSemi (107) + : : :- Sort (101) + : : : +- Exchange (100) + : : : +- Filter (99) + : : : +- Scan parquet (98) + : : +- Sort (106) + : : +- Exchange (105) + : : +- Project (104) + : : +- Filter (103) + : : +- Scan parquet (102) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- HashAggregate (122) + : +- HashAggregate (121) + : +- SortMergeJoin LeftSemi (120) + : :- Sort (114) + : : +- Exchange (113) + : : +- Project (112) + : : +- Filter (111) + : : +- Scan parquet (110) + : +- Sort (119) + : +- Exchange (118) + : +- Project (117) + : +- Filter (116) + : +- Scan parquet (115) + +- Sort (138) + +- Exchange (137) + +- Project (136) + +- Filter (135) + +- Scan parquet (134) (1) Scan parquet @@ -507,176 +517,216 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Scan parquet +(97) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(98) Filter +(99) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(99) Exchange +(100) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(103) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(104) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(105) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(106) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(105) Exchange +(108) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(109) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(110) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(107) Filter +(111) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(108) Project +(112) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(109) Exchange +(113) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(114) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(115) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(111) Filter +(116) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(112) Project +(117) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(113) Exchange +(118) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(119) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(120) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(115) HashAggregate +(121) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(116) HashAggregate +(122) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(117) Filter +(123) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(118) Exchange +(124) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(125) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(120) Project +(127) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(121) Exchange +(128) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(129) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(130) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(123) Project +(131) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(124) Exchange +(132) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Scan parquet +(133) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(134) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(126) Filter +(135) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(127) Project +(136) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(128) Exchange +(137) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) ShuffledHashJoin +(138) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(139) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(130) Project +(140) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(131) Exchange +(141) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(142) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(133) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt index 5c05ec24757e..8413e2f8f232 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (137) +- == Final Plan == VeloxColumnarToRowExec (91) +- ^ RegularHashAggregateExecTransformer (89) @@ -72,42 +72,51 @@ AdaptiveSparkPlan (128) +- ^ FilterExecTransformer (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (127) - +- HashAggregate (126) - +- Exchange (125) - +- HashAggregate (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (110) - : : +- Project (109) - : : +- ShuffledHashJoin Inner BuildLeft (108) - : : :- Exchange (94) - : : : +- Filter (93) - : : : +- Scan parquet (92) - : : +- Exchange (107) - : : +- ShuffledHashJoin LeftAnti BuildRight (106) - : : :- ShuffledHashJoin LeftSemi BuildRight (101) - : : : :- Exchange (98) - : : : : +- Project (97) - : : : : +- Filter (96) - : : : : +- Scan parquet (95) - : : : +- Exchange (100) - : : : +- Scan parquet (99) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Exchange (114) - : +- Project (113) - : +- Filter (112) - : +- Scan parquet (111) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + TakeOrderedAndProject (136) + +- HashAggregate (135) + +- Exchange (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- SortMergeJoin Inner (113) + : : :- Sort (95) + : : : +- Exchange (94) + : : : +- Filter (93) + : : : +- Scan parquet (92) + : : +- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftAnti (110) + : : :- SortMergeJoin LeftSemi (104) + : : : :- Sort (100) + : : : : +- Exchange (99) + : : : : +- Project (98) + : : : : +- Filter (97) + : : : : +- Scan parquet (96) + : : : +- Sort (103) + : : : +- Exchange (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (1) Scan parquet @@ -496,163 +505,199 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) Scan parquet +(95) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(96) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(96) Filter +(97) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(97) Project +(98) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(98) Exchange +(99) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(100) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(100) Exchange +(102) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) ShuffledHashJoin +(103) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(102) Scan parquet +(105) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(103) Filter +(106) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(104) Project +(107) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(105) Exchange +(108) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(109) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(107) Exchange +(111) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(114) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(110) Exchange +(115) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(116) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(117) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(112) Filter +(118) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(113) Project +(119) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(114) Exchange +(120) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(121) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(116) Project +(123) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(117) Exchange +(124) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(125) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(120) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(130) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(123) Project +(132) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(124) HashAggregate +(133) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(125) Exchange +(134) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) HashAggregate +(135) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(127) TakeOrderedAndProject +(136) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(128) AdaptiveSparkPlan +(137) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt index af5c086c274a..214b34066a8f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -210,191 +212,199 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ FilterExecTransformer (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ FilterExecTransformer (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) -(51) Scan parquet +(53) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(52) FilterExecTransformer +(54) FilterExecTransformer Input [2]: [c_phone#X, c_acctbal#X] Arguments: ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(53) ProjectExecTransformer +(55) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(54) FlushableHashAggregateExecTransformer +(56) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(55) WholeStageCodegenTransformer (X) +(57) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(56) VeloxAppendBatches +(58) VeloxAppendBatches Input [2]: [sum#X, count#X] Arguments: X -(57) ColumnarExchange +(59) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(58) ShuffleQueryStage +(60) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(59) InputAdapter +(61) InputAdapter Input [2]: [sum#X, count#X] -(60) InputIteratorTransformer +(62) InputIteratorTransformer Input [2]: [sum#X, count#X] -(61) RegularHashAggregateExecTransformer +(63) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(62) WholeStageCodegenTransformer (X) +(64) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(63) VeloxColumnarToRowExec +(65) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(64) Scan parquet +(66) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(65) Filter +(67) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(66) Project +(68) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(67) HashAggregate +(69) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(68) Exchange +(70) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(69) HashAggregate +(71) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(70) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true Subquery:2 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ FilterExecTransformer (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ FilterExecTransformer (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) \ No newline at end of file + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt index 51408f03e4e7..df17819cafe9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -256,80 +260,96 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt index 4da32d7f70ac..85d303df874f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -223,60 +225,68 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt index 2669a9fce3ae..8978f9563c68 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -567,176 +577,216 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt index b5abf7e36164..244f650f3a72 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79), Statistics(X) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -537,168 +547,208 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt index 47886e292bf7..282790ba6507 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ FilterExecTransformer (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -750,228 +764,284 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6)) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt index 0bad5c20cf05..15fbf97a77f3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -570,168 +580,208 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4)) as decimal(27,4)))), DecimalType(27,4)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt index 5be72ee42483..d7376c740f93 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ FilterExecTransformer (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -374,119 +380,143 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt index d9dbbfe0dbe9..c9371ffbf2c3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -313,365 +317,401 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (136) +- == Final Plan == - VeloxColumnarToRowExec (110) - +- ^ ProjectExecTransformer (108) - +- ^ RegularHashAggregateExecTransformer (107) - +- ^ RegularHashAggregateExecTransformer (106) - +- ^ ProjectExecTransformer (105) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (104) - :- ^ InputIteratorTransformer (99) - : +- ShuffleQueryStage (97), Statistics(X) - : +- ColumnarExchange (96) - : +- VeloxAppendBatches (95) - : +- ^ ProjectExecTransformer (93) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (92) - : :- ^ InputIteratorTransformer (87) - : : +- ShuffleQueryStage (85), Statistics(X) - : : +- ColumnarExchange (84) - : : +- VeloxAppendBatches (83) - : : +- ^ ProjectExecTransformer (81) - : : +- ^ FilterExecTransformer (80) - : : +- ^ Scan parquet (79) - : +- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ReusedExchange (88) - +- ^ InputIteratorTransformer (103) - +- ShuffleQueryStage (101), Statistics(X) - +- ReusedExchange (100) + VeloxColumnarToRowExec (114) + +- ^ ProjectExecTransformer (112) + +- ^ RegularHashAggregateExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxAppendBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxAppendBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (127) - +- HashAggregate (126) - +- Project (125) - +- ShuffledHashJoin Inner BuildRight (124) - :- Exchange (119) - : +- Project (118) - : +- ShuffledHashJoin Inner BuildRight (117) - : :- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (116) - : +- Filter (115) - : +- Scan parquet (114) - +- Exchange (123) - +- Project (122) - +- Filter (121) - +- Scan parquet (120) - - -(79) Scan parquet + HashAggregate (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (118) + : : +- Exchange (117) + : : +- Filter (116) + : : +- Scan parquet (115) + : +- Sort (122) + : +- Exchange (121) + : +- Filter (120) + : +- Scan parquet (119) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) + + +(83) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(80) FilterExecTransformer +(84) FilterExecTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: isnotnull(ps_suppkey#X) -(81) ProjectExecTransformer +(85) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(82) WholeStageCodegenTransformer (X) +(86) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(83) VeloxAppendBatches +(87) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(84) ColumnarExchange +(88) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(85) ShuffleQueryStage +(89) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(86) InputAdapter +(90) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(87) InputIteratorTransformer +(91) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(88) ReusedExchange [Reuses operator id: 15] +(92) ReusedExchange [Reuses operator id: 15] Output [2]: [s_suppkey#X, s_nationkey#X] -(89) ShuffleQueryStage +(93) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(94) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(91) InputIteratorTransformer +(95) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(92) ShuffledHashJoinExecTransformer +(96) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(93) ProjectExecTransformer +(97) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(94) WholeStageCodegenTransformer (X) +(98) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(95) VeloxAppendBatches +(99) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(96) ColumnarExchange +(100) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(97) ShuffleQueryStage +(101) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(98) InputAdapter +(102) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(99) InputIteratorTransformer +(103) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(100) ReusedExchange [Reuses operator id: 32] +(104) ReusedExchange [Reuses operator id: 32] Output [1]: [n_nationkey#X] -(101) ShuffleQueryStage +(105) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(102) InputAdapter +(106) InputAdapter Input [1]: [n_nationkey#X] -(103) InputIteratorTransformer +(107) InputIteratorTransformer Input [1]: [n_nationkey#X] -(104) ShuffledHashJoinExecTransformer +(108) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(105) ProjectExecTransformer +(109) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(106) RegularHashAggregateExecTransformer +(110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(107) RegularHashAggregateExecTransformer +(111) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(108) ProjectExecTransformer +(112) ProjectExecTransformer Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(109) WholeStageCodegenTransformer (X) +(113) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(110) VeloxColumnarToRowExec +(114) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(111) Scan parquet +(115) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(112) Filter +(116) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(113) Exchange +(117) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Scan parquet +(118) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(119) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(115) Filter +(120) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(116) Exchange +(121) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(118) Project +(124) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(119) Exchange +(125) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) Scan parquet +(126) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(121) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(122) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(123) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(124) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(125) Project +(133) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(126) HashAggregate +(134) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(127) HashAggregate +(135) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(128) AdaptiveSparkPlan +(136) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt index 63c356d6d1bf..ce033f5468d1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -219,61 +221,69 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt index 812a7be868b6..c71d03b93e12 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -230,75 +232,83 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftOuter Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt index c6f425f00868..492d3f8b9d07 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -163,45 +165,53 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt index e30eec3d854f..129e4ad927e9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (47) +- == Final Plan == VeloxColumnarToRowExec (30) +- AQEShuffleRead (29) @@ -27,20 +27,22 @@ AdaptiveSparkPlan (45) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (46) + +- Exchange (45) + +- Project (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -187,222 +189,230 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join type: Inner Join condition: None -(42) Project +(44) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(45) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(46) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(47) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (71) +AdaptiveSparkPlan (73) +- == Final Plan == - VeloxColumnarToRowExec (62) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ ProjectExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ InputIteratorTransformer (56) - +- ShuffleQueryStage (54), Statistics(X) - +- ColumnarExchange (53) - +- VeloxAppendBatches (52) - +- ^ ProjectExecTransformer (50) - +- ^ FlushableHashAggregateExecTransformer (49) - +- ^ ProjectExecTransformer (48) - +- ^ FilterExecTransformer (47) - +- ^ Scan parquet (46) + VeloxColumnarToRowExec (64) + +- ^ RegularHashAggregateExecTransformer (62) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxAppendBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (70) - +- HashAggregate (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- Filter (64) - +- Scan parquet (63) + HashAggregate (72) + +- HashAggregate (71) + +- HashAggregate (70) + +- Exchange (69) + +- HashAggregate (68) + +- Project (67) + +- Filter (66) + +- Scan parquet (65) -(46) Scan parquet +(48) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(47) FilterExecTransformer +(49) FilterExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(48) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(49) FlushableHashAggregateExecTransformer +(51) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(50) ProjectExecTransformer +(52) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(51) WholeStageCodegenTransformer (X) +(53) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(52) VeloxAppendBatches +(54) VeloxAppendBatches Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(53) ColumnarExchange +(55) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(54) ShuffleQueryStage +(56) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(55) InputAdapter +(57) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(56) InputIteratorTransformer +(58) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(57) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(58) ProjectExecTransformer +(60) ProjectExecTransformer Output [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(59) RegularHashAggregateExecTransformer +(61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(60) RegularHashAggregateExecTransformer +(62) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(61) WholeStageCodegenTransformer (X) +(63) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(62) VeloxColumnarToRowExec +(64) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(63) Scan parquet +(65) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(64) Filter +(66) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(65) Project +(67) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(66) HashAggregate +(68) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(67) Exchange +(69) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(70) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(69) HashAggregate +(71) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(70) HashAggregate +(72) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(71) AdaptiveSparkPlan +(73) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt index eff7577281e6..45b6041f8b4b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -298,75 +300,83 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt index 649bfcbe40e1..b46b3e3f2724 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ FilterExecTransformer (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -249,92 +252,104 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt index c3075e511782..febb48962446 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57), Statistics(X) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -405,158 +412,186 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt index baf4b2a51607..fa78645313e4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -158,45 +160,53 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt index 7ddecfe855eb..bb9987fc32c1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (93) +- AQEShuffleRead (92) @@ -75,45 +75,55 @@ AdaptiveSparkPlan (133) +- ^ FilterExecTransformer (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- Project (130) - +- ShuffledHashJoin Inner BuildRight (129) - :- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin LeftSemi BuildRight (122) - : :- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Exchange (121) - : +- Project (120) - : +- ShuffledHashJoin Inner BuildLeft (119) - : :- Exchange (105) - : : +- ShuffledHashJoin LeftSemi BuildRight (104) - : : :- Exchange (99) - : : : +- Filter (98) - : : : +- Scan parquet (97) - : : +- Exchange (103) - : : +- Project (102) - : : +- Filter (101) - : : +- Scan parquet (100) - : +- Exchange (118) - : +- Filter (117) - : +- HashAggregate (116) - : +- HashAggregate (115) - : +- ShuffledHashJoin LeftSemi BuildRight (114) - : :- Exchange (109) - : : +- Project (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (113) - : +- Project (112) - : +- Filter (111) - : +- Scan parquet (110) - +- Exchange (128) - +- Project (127) - +- Filter (126) - +- Scan parquet (125) + Sort (142) + +- Exchange (141) + +- Project (140) + +- SortMergeJoin Inner (139) + :- Sort (133) + : +- Exchange (132) + : +- Project (131) + : +- SortMergeJoin LeftSemi (130) + : :- Sort (97) + : : +- Exchange (96) + : : +- Filter (95) + : : +- Scan parquet (94) + : +- Sort (129) + : +- Exchange (128) + : +- Project (127) + : +- SortMergeJoin Inner (126) + : :- Sort (109) + : : +- Exchange (108) + : : +- SortMergeJoin LeftSemi (107) + : : :- Sort (101) + : : : +- Exchange (100) + : : : +- Filter (99) + : : : +- Scan parquet (98) + : : +- Sort (106) + : : +- Exchange (105) + : : +- Project (104) + : : +- Filter (103) + : : +- Scan parquet (102) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- HashAggregate (122) + : +- HashAggregate (121) + : +- SortMergeJoin LeftSemi (120) + : :- Sort (114) + : : +- Exchange (113) + : : +- Project (112) + : : +- Filter (111) + : : +- Scan parquet (110) + : +- Sort (119) + : +- Exchange (118) + : +- Project (117) + : +- Filter (116) + : +- Scan parquet (115) + +- Sort (138) + +- Exchange (137) + +- Project (136) + +- Filter (135) + +- Scan parquet (134) (1) Scan parquet @@ -512,181 +522,221 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Scan parquet +(97) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(98) Filter +(99) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(99) Exchange +(100) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(103) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(104) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(105) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(106) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(105) Exchange +(108) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(109) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(110) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(107) Filter +(111) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(108) Project +(112) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(109) Exchange +(113) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(114) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(115) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(111) Filter +(116) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(112) Project +(117) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(113) Exchange +(118) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(119) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(120) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(115) HashAggregate +(121) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(116) HashAggregate +(122) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(117) Filter +(123) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(118) Exchange +(124) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(125) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(120) Project +(127) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(121) Exchange +(128) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(129) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(130) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(123) Project +(131) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(124) Exchange +(132) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Scan parquet +(133) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(134) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(126) Filter +(135) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(127) Project +(136) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(128) Exchange +(137) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) ShuffledHashJoin +(138) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(139) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(130) Project +(140) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(131) Exchange +(141) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(142) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(133) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt index 7b8c173fc086..5e8c9ad9f92a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (137) +- == Final Plan == VeloxColumnarToRowExec (91) +- ^ RegularHashAggregateExecTransformer (89) @@ -72,42 +72,51 @@ AdaptiveSparkPlan (128) +- ^ FilterExecTransformer (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (127) - +- HashAggregate (126) - +- Exchange (125) - +- HashAggregate (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (110) - : : +- Project (109) - : : +- ShuffledHashJoin Inner BuildLeft (108) - : : :- Exchange (94) - : : : +- Filter (93) - : : : +- Scan parquet (92) - : : +- Exchange (107) - : : +- ShuffledHashJoin LeftAnti BuildRight (106) - : : :- ShuffledHashJoin LeftSemi BuildRight (101) - : : : :- Exchange (98) - : : : : +- Project (97) - : : : : +- Filter (96) - : : : : +- Scan parquet (95) - : : : +- Exchange (100) - : : : +- Scan parquet (99) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Exchange (114) - : +- Project (113) - : +- Filter (112) - : +- Scan parquet (111) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + TakeOrderedAndProject (136) + +- HashAggregate (135) + +- Exchange (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- SortMergeJoin Inner (113) + : : :- Sort (95) + : : : +- Exchange (94) + : : : +- Filter (93) + : : : +- Scan parquet (92) + : : +- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftAnti (110) + : : :- SortMergeJoin LeftSemi (104) + : : : :- Sort (100) + : : : : +- Exchange (99) + : : : : +- Project (98) + : : : : +- Filter (97) + : : : : +- Scan parquet (96) + : : : +- Sort (103) + : : : +- Exchange (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (1) Scan parquet @@ -501,168 +510,204 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) Scan parquet +(95) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(96) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(96) Filter +(97) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(97) Project +(98) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(98) Exchange +(99) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(100) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(100) Exchange +(102) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) ShuffledHashJoin +(103) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: NOT (l_suppkey#X = l_suppkey#X) -(102) Scan parquet +(105) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(103) Filter +(106) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(104) Project +(107) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(105) Exchange +(108) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(109) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftAnti Join condition: NOT (l_suppkey#X = l_suppkey#X) -(107) Exchange +(111) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(109) Project +(114) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(110) Exchange +(115) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(116) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(117) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(112) Filter +(118) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(113) Project +(119) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(114) Exchange +(120) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(121) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(116) Project +(123) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(117) Exchange +(124) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(125) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(120) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(130) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(123) Project +(132) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(124) HashAggregate +(133) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(125) Exchange +(134) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) HashAggregate +(135) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(127) TakeOrderedAndProject +(136) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(128) AdaptiveSparkPlan +(137) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt index d6ec93a97fc6..50f1c1bdef30 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -211,192 +213,200 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftAnti Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ FilterExecTransformer (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ FilterExecTransformer (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) -(51) Scan parquet +(53) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(52) FilterExecTransformer +(54) FilterExecTransformer Input [2]: [c_phone#X, c_acctbal#X] Arguments: ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(53) ProjectExecTransformer +(55) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(54) FlushableHashAggregateExecTransformer +(56) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(55) WholeStageCodegenTransformer (X) +(57) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(56) VeloxAppendBatches +(58) VeloxAppendBatches Input [2]: [sum#X, count#X] Arguments: X -(57) ColumnarExchange +(59) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(58) ShuffleQueryStage +(60) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(59) InputAdapter +(61) InputAdapter Input [2]: [sum#X, count#X] -(60) InputIteratorTransformer +(62) InputIteratorTransformer Input [2]: [sum#X, count#X] -(61) RegularHashAggregateExecTransformer +(63) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(62) WholeStageCodegenTransformer (X) +(64) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(63) VeloxColumnarToRowExec +(65) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(64) Scan parquet +(66) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(65) Filter +(67) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(66) Project +(68) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(67) HashAggregate +(69) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(68) Exchange +(70) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(69) HashAggregate +(71) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(70) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true Subquery:2 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ FilterExecTransformer (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ FilterExecTransformer (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) \ No newline at end of file + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt index 709a1700b5c7..50ad3b59c347 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -258,82 +262,98 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt index a82dbf288086..2db46753e9fc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -224,61 +226,69 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt index f20a52b91a8f..07a5c86709f4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -572,181 +582,221 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join type: Inner Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt index 710c6f3ba189..b27398e415d3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79), Statistics(X) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -542,173 +552,213 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt index 953a3a8f0a7c..fa2a2789f4d3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ FilterExecTransformer (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -757,235 +771,291 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt index 492ff1aeadd0..3000cbae7a6d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -575,173 +585,213 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join type: Inner Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala index 27d191b9ee05..2b40ac54b2c6 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.execution.{ColumnarShuffleExchangeExec, SparkPlan} import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, AQEShuffleReadExec} import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec +import org.apache.spark.sql.execution.joins.SortMergeJoinExec class FallbackSuite extends VeloxWholeStageTransformerSuite with AdaptiveSparkPlanHelper { protected val rootPath: String = getClass.getResource("/").getPath @@ -240,4 +241,26 @@ class FallbackSuite extends VeloxWholeStageTransformerSuite with AdaptiveSparkPl } } } + + test("fallback with smj") { + val sql = "SELECT /*+ SHUFFLE_MERGE(tmp1) */ * FROM tmp1 join tmp2 on tmp1.c1 = tmp2.c1" + withSQLConf( + GlutenConfig.COLUMNAR_FPRCE_SHUFFLED_HASH_JOIN_ENABLED.key -> "true", + GlutenConfig.COLUMNAR_SHUFFLED_HASH_JOIN_ENABLED.key -> "false") { + runQueryAndCompare(sql) { + df => + val plan = df.queryExecution.executedPlan + assert(collect(plan) { case smj: SortMergeJoinExec => smj }.size == 1) + } + } + withSQLConf( + GlutenConfig.COLUMNAR_FPRCE_SHUFFLED_HASH_JOIN_ENABLED.key -> "false", + GlutenConfig.COLUMNAR_SORTMERGEJOIN_ENABLED.key -> "false") { + runQueryAndCompare(sql) { + df => + val plan = df.queryExecution.executedPlan + assert(collect(plan) { case smj: SortMergeJoinExec => smj }.size == 1) + } + } + } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala index 2860e3ced072..0f397c69263c 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala @@ -22,7 +22,7 @@ import org.apache.gluten.backendsapi.BackendsApiManager import org.apache.gluten.events.GlutenBuildInfoEvent import org.apache.gluten.exception.GlutenException import org.apache.gluten.expression.ExpressionMappings -import org.apache.gluten.extension.{ColumnarOverrides, OthersExtensionOverrides, QueryStagePrepOverrides, StrategyOverrides} +import org.apache.gluten.extension.{ColumnarOverrides, OthersExtensionOverrides, QueryStagePrepOverrides} import org.apache.gluten.test.TestStats import org.apache.gluten.utils.TaskListener @@ -312,7 +312,6 @@ private[gluten] object GlutenPlugin { val DEFAULT_INJECTORS: List[GlutenSparkExtensionsInjector] = List( QueryStagePrepOverrides, ColumnarOverrides, - StrategyOverrides, OthersExtensionOverrides ) } diff --git a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala index 50292839b684..d159486373ac 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala @@ -86,7 +86,6 @@ trait BackendSettingsApi { * the result columns from the shuffle. */ def supportShuffleWithProject(outputPartitioning: Partitioning, child: SparkPlan): Boolean = false - def utilizeShuffledHashJoinHint(): Boolean = false def excludeScanExecFromCollapsedStage(): Boolean = false def rescaleDecimalArithmetic: Boolean = false diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/SortUtils.scala similarity index 53% rename from shims/spark32/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala rename to gluten-core/src/main/scala/org/apache/gluten/execution/SortUtils.scala index 5849dd600b7d..2c0ad1b0a59a 100644 --- a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/SortUtils.scala @@ -14,27 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.sql.execution +package org.apache.gluten.execution -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, LogicalPlan} +import org.apache.gluten.extension.columnar.rewrite.RewrittenNodeWall -// https://issues.apache.org/jira/browse/SPARK-36745 -object JoinSelectionShim { - object ExtractEquiJoinKeysShim { - type ReturnType = - ( - JoinType, - Seq[Expression], - Seq[Expression], - Option[Expression], - LogicalPlan, - LogicalPlan, - JoinHint) - def unapply(join: Join): Option[ReturnType] = { - ExtractEquiJoinKeys.unapply(join) - } +import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan} + +object SortUtils { + def dropPartialSort(plan: SparkPlan): SparkPlan = plan match { + case RewrittenNodeWall(p) => RewrittenNodeWall(dropPartialSort(p)) + case sort: SortExec if !sort.global => sort.child + // from pre/post project-pulling + case ProjectExec(_, SortExec(_, false, ProjectExec(_, p), _)) + if plan.outputSet == p.outputSet => + p + case _ => plan } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala deleted file mode 100644 index f2f786259393..000000000000 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.gluten.extension - -import org.apache.gluten.{GlutenConfig, GlutenSparkExtensionsInjector} -import org.apache.gluten.backendsapi.BackendsApiManager -import org.apache.gluten.extension.columnar.TRANSFORM_UNSUPPORTED -import org.apache.gluten.extension.columnar.TransformHints.TAG -import org.apache.gluten.utils.LogicalPlanSelector - -import org.apache.spark.sql.{SparkSession, SparkSessionExtensions, Strategy} -import org.apache.spark.sql.catalyst.SQLConfHelper -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, JoinSelectionHelper} -import org.apache.spark.sql.catalyst.plans._ -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.execution.{joins, JoinSelectionShim, SparkPlan} -import org.apache.spark.sql.execution.adaptive.{BroadcastQueryStageExec, LogicalQueryStage} -import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec - -object StrategyOverrides extends GlutenSparkExtensionsInjector { - override def inject(extensions: SparkSessionExtensions): Unit = { - extensions.injectPlannerStrategy(JoinSelectionOverrides) - } -} - -case class JoinSelectionOverrides(session: SparkSession) - extends Strategy - with JoinSelectionHelper - with SQLConfHelper { - - private def isBroadcastStage(plan: LogicalPlan): Boolean = plan match { - case LogicalQueryStage(_, _: BroadcastQueryStageExec) => true - case _ => false - } - - def extractEqualJoinKeyCondition( - joinType: JoinType, - leftKeys: Seq[Expression], - rightKeys: Seq[Expression], - condition: Option[Expression], - left: LogicalPlan, - right: LogicalPlan, - hint: JoinHint, - forceShuffledHashJoin: Boolean): Seq[SparkPlan] = { - if (isBroadcastStage(left) || isBroadcastStage(right)) { - val buildSide = if (isBroadcastStage(left)) BuildLeft else BuildRight - Seq( - BroadcastHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide, - condition, - planLater(left), - planLater(right))) - } else { - // Generate BHJ here, avoid to do match in `JoinSelection` again. - val isHintEmpty = hint.leftHint.isEmpty && hint.rightHint.isEmpty - val buildSide = getBroadcastBuildSide(left, right, joinType, hint, !isHintEmpty, conf) - if (buildSide.isDefined) { - return Seq( - joins.BroadcastHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide.get, - condition, - planLater(left), - planLater(right))) - } - - if ( - forceShuffledHashJoin && - !BackendsApiManager.getSparkPlanExecApiInstance.joinFallback( - joinType, - left.outputSet, - right.outputSet, - condition) && - !left.getTagValue(TAG).isDefined && - !right.getTagValue(TAG).isDefined - ) { - // Force use of ShuffledHashJoin in preference to SortMergeJoin. With no respect to - // conf setting "spark.sql.join.preferSortMergeJoin". - val (leftBuildable, rightBuildable) = - if (BackendsApiManager.getSettings.utilizeShuffledHashJoinHint()) { - // Currently, ClickHouse backend can not support AQE, so it needs to use join hint - // to decide the build side, after supporting AQE, will remove this. - val leftHintEnabled = hintToShuffleHashJoinLeft(hint) - val rightHintEnabled = hintToShuffleHashJoinRight(hint) - val leftHintMergeEnabled = hint.leftHint.exists(_.strategy.contains(SHUFFLE_MERGE)) - val rightHintMergeEnabled = hint.rightHint.exists(_.strategy.contains(SHUFFLE_MERGE)) - if (leftHintEnabled || rightHintEnabled) { - (leftHintEnabled, rightHintEnabled) - } else if (leftHintMergeEnabled || rightHintMergeEnabled) { - // hack: when set SHUFFLE_MERGE hint, it means that - // it don't use this side as the build side - (!leftHintMergeEnabled, !rightHintMergeEnabled) - } else { - ( - BackendsApiManager.getSettings.supportHashBuildJoinTypeOnLeft(joinType), - BackendsApiManager.getSettings.supportHashBuildJoinTypeOnRight(joinType)) - } - } else { - (canBuildShuffledHashJoinLeft(joinType), canBuildShuffledHashJoinRight(joinType)) - } - - if (!leftBuildable && !rightBuildable) { - return Nil - } - val buildSide = if (!leftBuildable) { - BuildRight - } else if (!rightBuildable) { - BuildLeft - } else { - getSmallerSide(left, right) - } - - return Option(buildSide) - .map { - buildSide => - Seq( - joins.ShuffledHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide, - condition, - planLater(left), - planLater(right))) - } - .getOrElse(Nil) - } - Nil - } - } - - def existsMultiJoins(plan: LogicalPlan, count: Int = 0): Boolean = { - plan match { - case plan: Join => - if ((count + 1) >= GlutenConfig.getConf.logicalJoinOptimizationThrottle) return true - plan.children.exists(existsMultiJoins(_, count + 1)) - case plan: Project => - if ((count + 1) >= GlutenConfig.getConf.logicalJoinOptimizationThrottle) return true - plan.children.exists(existsMultiJoins(_, count + 1)) - case other => false - } - } - - def tagNotTransformable(plan: LogicalPlan, reason: String): LogicalPlan = { - plan.setTagValue(TAG, TRANSFORM_UNSUPPORTED(Some(reason))) - plan - } - - def tagNotTransformableRecursive(plan: LogicalPlan, reason: String): LogicalPlan = { - tagNotTransformable( - plan.withNewChildren(plan.children.map(tagNotTransformableRecursive(_, reason))), - reason) - } - - def existLeftOuterJoin(plan: LogicalPlan): Boolean = { - plan.collect { - case join: Join if join.joinType.sql.equals("LEFT OUTER") => - return true - }.size > 0 - } - - override def apply(plan: LogicalPlan): Seq[SparkPlan] = - LogicalPlanSelector.maybeNil(session, plan) { - // Ignore forceShuffledHashJoin if exist multi continuous joins - if ( - GlutenConfig.getConf.enableLogicalJoinOptimize && - existsMultiJoins(plan) && existLeftOuterJoin(plan) - ) { - tagNotTransformableRecursive(plan, "exist multi continuous joins") - } - plan match { - // If the build side of BHJ is already decided by AQE, we need to keep the build side. - case JoinSelectionShim.ExtractEquiJoinKeysShim( - joinType, - leftKeys, - rightKeys, - condition, - left, - right, - hint) => - extractEqualJoinKeyCondition( - joinType, - leftKeys, - rightKeys, - condition, - left, - right, - hint, - GlutenConfig.getConf.forceShuffledHashJoin) - case _ => Nil - } - } -} diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala index 11db0bc1faf1..1f6f840b5552 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala @@ -27,6 +27,8 @@ import org.apache.gluten.utils.{LogLevelUtil, PlanUtil} import org.apache.spark.api.python.EvalPythonExecTransformer import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, NamedExpression} +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} +import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} import org.apache.spark.sql.execution.datasources.WriteFilesExec @@ -136,7 +138,7 @@ case class OffloadJoin() extends OffloadSingleNode with LogLevelUtil { plan.leftKeys, plan.rightKeys, plan.joinType, - TransformHints.getShuffleHashJoinBuildSide(plan), + OffloadJoin.getBuildSide(plan), plan.condition, left, right, @@ -187,6 +189,31 @@ case class OffloadJoin() extends OffloadSingleNode with LogLevelUtil { } } +object OffloadJoin { + + def getBuildSide(shj: ShuffledHashJoinExec): BuildSide = { + val leftBuildable = + BackendsApiManager.getSettings.supportHashBuildJoinTypeOnLeft(shj.joinType) + val rightBuildable = + BackendsApiManager.getSettings.supportHashBuildJoinTypeOnRight(shj.joinType) + if (!leftBuildable) { + BuildRight + } else if (!rightBuildable) { + BuildLeft + } else { + shj.logicalLink match { + case Some(join: Join) => + val leftSize = join.left.stats.sizeInBytes + val rightSize = join.right.stats.sizeInBytes + if (rightSize <= leftSize) BuildRight else BuildLeft + // Only the ShuffledHashJoinExec generated directly in some spark tests is not link + // logical plan, such as OuterJoinSuite. + case _ => shj.buildSide + } + } + } +} + case class OffloadProject() extends OffloadSingleNode with LogLevelUtil { private def containsInputFileRelatedExpr(expr: Expression): Boolean = { expr match { diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala index aa7aab759ef8..7fb451057a2e 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala @@ -29,8 +29,6 @@ import org.apache.spark.api.python.EvalPythonExecTransformer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.AttributeReference -import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} -import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.execution._ @@ -154,33 +152,6 @@ object TransformHints { tag(plan, newTag) } } - - def getShuffleHashJoinBuildSide(shj: ShuffledHashJoinExec): BuildSide = { - if (BackendsApiManager.getSettings.utilizeShuffledHashJoinHint()) { - shj.buildSide - } else { - val leftBuildable = BackendsApiManager.getSettings - .supportHashBuildJoinTypeOnLeft(shj.joinType) - val rightBuildable = BackendsApiManager.getSettings - .supportHashBuildJoinTypeOnRight(shj.joinType) - - if (!leftBuildable) { - BuildRight - } else if (!rightBuildable) { - BuildLeft - } else { - shj.logicalLink match { - case Some(join: Join) => - val leftSize = join.left.stats.sizeInBytes - val rightSize = join.right.stats.sizeInBytes - if (rightSize <= leftSize) BuildRight else BuildLeft - // Only the ShuffledHashJoinExec generated directly in some spark tests is not link - // logical plan, such as OuterJoinSuite. - case _ => shj.buildSide - } - } - } - } } case class FallbackOnANSIMode(session: SparkSession) extends Rule[SparkPlan] { @@ -205,6 +176,9 @@ case class FallbackMultiCodegens(session: SparkSession) extends Rule[SparkPlan] case plan: ShuffledHashJoinExec => if ((count + 1) >= optimizeLevel) return true plan.children.exists(existsMultiCodegens(_, count + 1)) + case plan: SortMergeJoinExec if GlutenConfig.getConf.forceShuffledHashJoin => + if ((count + 1) >= optimizeLevel) return true + plan.children.exists(existsMultiCodegens(_, count + 1)) case other => false } @@ -415,7 +389,7 @@ case class AddTransformHintRule() extends Rule[SparkPlan] { plan.leftKeys, plan.rightKeys, plan.joinType, - TransformHints.getShuffleHashJoinBuildSide(plan), + OffloadJoin.getBuildSide(plan), plan.condition, plan.left, plan.right, diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala new file mode 100644 index 000000000000..e038f5af0a07 --- /dev/null +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.extension.columnar.rewrite + +import org.apache.gluten.GlutenConfig +import org.apache.gluten.execution.SortUtils + +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide, JoinSelectionHelper} +import org.apache.spark.sql.catalyst.plans.JoinType +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec} + +/** + * If force ShuffledHashJoin, convert [[SortMergeJoinExec]] to [[ShuffledHashJoinExec]]. There is no + * need to select a smaller table as buildSide here, it will be reselected when offloading. + */ +object RewriteJoin extends RewriteSingleNode with JoinSelectionHelper { + + private def getBuildSide(joinType: JoinType): Option[BuildSide] = { + val leftBuildable = canBuildShuffledHashJoinLeft(joinType) + val rightBuildable = canBuildShuffledHashJoinRight(joinType) + if (rightBuildable) { + Some(BuildRight) + } else if (leftBuildable) { + Some(BuildLeft) + } else { + None + } + } + + override def rewrite(plan: SparkPlan): SparkPlan = plan match { + case smj: SortMergeJoinExec if GlutenConfig.getConf.forceShuffledHashJoin => + getBuildSide(smj.joinType) match { + case Some(buildSide) => + ShuffledHashJoinExec( + smj.leftKeys, + smj.rightKeys, + smj.joinType, + buildSide, + smj.condition, + SortUtils.dropPartialSort(smj.left), + SortUtils.dropPartialSort(smj.right), + smj.isSkewJoin + ) + case _ => plan + } + case _ => plan + } +} diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSingleNode.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSingleNode.scala index 01f2e29fe62d..551cfd599abd 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSingleNode.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSingleNode.scala @@ -35,6 +35,6 @@ trait RewriteSingleNode { object RewriteSingleNode { def allRules(): Seq[RewriteSingleNode] = { - Seq(RewriteIn, RewriteMultiChildrenCount, PullOutPreProject, PullOutPostProject) + Seq(RewriteIn, RewriteMultiChildrenCount, RewriteJoin, PullOutPreProject, PullOutPostProject) } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSparkPlanRulesManager.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSparkPlanRulesManager.scala index 8706e5618f6b..34fe34f3f3fa 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSparkPlanRulesManager.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSparkPlanRulesManager.scala @@ -67,12 +67,11 @@ class RewriteSparkPlanRulesManager private (rewriteRules: Seq[RewriteSingleNode] } } - private def getTransformHintBack( - origin: SparkPlan, - rewrittenPlan: SparkPlan): Option[TransformHint] = { - // The rewritten plan may contain more nodes than origin, here use the node name to get it back + private def getTransformHintBack(rewrittenPlan: SparkPlan): Option[TransformHint] = { + // The rewritten plan may contain more nodes than origin, for now it should only be + // `ProjectExec`. val target = rewrittenPlan.collect { - case p if p.nodeName == origin.nodeName => p + case p if !p.isInstanceOf[ProjectExec] && !p.isInstanceOf[RewrittenNodeWall] => p } assert(target.size == 1) TransformHints.getHintOption(target.head) @@ -113,7 +112,7 @@ class RewriteSparkPlanRulesManager private (rewriteRules: Seq[RewriteSingleNode] origin } else { addHint.apply(rewrittenPlan) - val hint = getTransformHintBack(origin, rewrittenPlan) + val hint = getTransformHintBack(rewrittenPlan) if (hint.isDefined) { // If the rewritten plan is still not transformable, return the original plan. TransformHints.tag(origin, hint.get) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 5df53953e4cc..a17f72de3121 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -331,7 +331,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Shouldn't change broadcast join buildSide if user clearly specified") .exclude("Shouldn't bias towards build right if user didn't specify") .exclude("SPARK-23192: broadcast hint should be retained after using the cached data") - .exclude("broadcast hint isn't propagated after a join") .exclude("broadcast join where streamed side's output partitioning is HashPartitioning") .exclude("broadcast join where streamed side's output partitioning is PartitioningCollection") enableSuite[GlutenSQLQuerySuite] diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala index c9ccc1afc75d..f418ec06645c 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala @@ -22,7 +22,6 @@ import org.apache.gluten.utils.{BackendTestUtils, SystemParameters} import org.apache.spark.sql.{GlutenTestsCommonTrait, SparkSession} import org.apache.spark.sql.catalyst.optimizer._ -import org.apache.spark.sql.execution.exchange.EnsureRequirements import org.apache.spark.sql.functions.broadcast import org.apache.spark.sql.internal.SQLConf @@ -41,8 +40,6 @@ class GlutenBroadcastJoinSuite extends BroadcastJoinSuite with GlutenTestsCommon * Create a new [[SparkSession]] running in local-cluster mode with unsafe and codegen enabled. */ - private val EnsureRequirements = new EnsureRequirements() - private val isVeloxBackend = BackendTestUtils.isVeloxBackendLoaded() // BroadcastHashJoinExecTransformer is not case class, can't call toString method, @@ -235,22 +232,6 @@ class GlutenBroadcastJoinSuite extends BroadcastJoinSuite with GlutenTestsCommon } } - testGluten("broadcast hint isn't propagated after a join") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { - val df1 = Seq((1, "4"), (2, "2")).toDF("key", "value") - val df2 = Seq((1, "1"), (2, "2")).toDF("key", "value") - val df3 = df1.join(broadcast(df2), Seq("key"), "inner").drop(df2("key")) - - val df4 = Seq((1, "5"), (2, "5")).toDF("key", "value") - val df5 = df4.join(df3, Seq("key"), "inner") - - val plan = EnsureRequirements.apply(df5.queryExecution.sparkPlan) - - assert(plan.collect { case p: BroadcastHashJoinExec => p }.size === 1) - assert(plan.collect { case p: ShuffledHashJoinExec => p }.size === 1) - } - } - private def assertJoinBuildSide(sqlStr: String, joinMethod: String, buildSide: BuildSide): Any = { val executedPlan = stripAQEPlan(sql(sqlStr).queryExecution.executedPlan) executedPlan match { diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index 00b4bf5821f1..4df9c63b3ef6 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.extension -import org.apache.gluten.extension.{ColumnarOverrideRules, JoinSelectionOverrides} +import org.apache.gluten.extension.ColumnarOverrideRules import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -31,7 +31,6 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { testGluten("test gluten extensions") { assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) - assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) assert(spark.sessionState.planner.strategies.contains(MySparkStrategy(spark))) assert(spark.sessionState.analyzer.extendedResolutionRules.contains(MyRule(spark))) diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 8418cba237e3..ae3e7c7b8e9d 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -865,7 +865,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Shouldn't change broadcast join buildSide if user clearly specified") .exclude("Shouldn't bias towards build right if user didn't specify") .exclude("SPARK-23192: broadcast hint should be retained after using the cached data") - .exclude("broadcast hint isn't propagated after a join") .exclude("broadcast join where streamed side's output partitioning is HashPartitioning") enableSuite[GlutenExistenceJoinSuite] diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 026e2dde0055..92e6fee97ea9 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -85,11 +85,11 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq("COUNT", "COLLECT_LIST").foreach { - aggExpr => + Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + aggExprInfo => val query = s""" - |SELECT key, $aggExpr(key) + |SELECT key, ${aggExprInfo._1}(key) |FROM |( | SELECT /*+ SHUFFLE_MERGE(t1) */ t1.key AS key @@ -102,7 +102,7 @@ class GlutenReplaceHashWithSortAggSuite if (BackendsApiManager.getSettings.mergeTwoPhasesHashBaseAggregateIfNeed()) { checkAggs(query, 1, 0, 1, 0) } else { - checkAggs(query, 2, 0, 2, 0) + checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) } } } diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index 00b4bf5821f1..4df9c63b3ef6 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.extension -import org.apache.gluten.extension.{ColumnarOverrideRules, JoinSelectionOverrides} +import org.apache.gluten.extension.ColumnarOverrideRules import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -31,7 +31,6 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { testGluten("test gluten extensions") { assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) - assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) assert(spark.sessionState.planner.strategies.contains(MySparkStrategy(spark))) assert(spark.sessionState.analyzer.extendedResolutionRules.contains(MyRule(spark))) diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 91127c4ba9bb..0da19922ffda 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -857,7 +857,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Shouldn't change broadcast join buildSide if user clearly specified") .exclude("Shouldn't bias towards build right if user didn't specify") .exclude("SPARK-23192: broadcast hint should be retained after using the cached data") - .exclude("broadcast hint isn't propagated after a join") .exclude("broadcast join where streamed side's output partitioning is HashPartitioning") .exclude("broadcast join where streamed side's output partitioning is PartitioningCollection") @@ -1119,9 +1118,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenJoinSuite] // exclude as it check spark plan .exclude("SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join") - // exclude as it check for SMJ node - .exclude( - "SPARK-43113: Full outer join with duplicate stream-side references in condition (SMJ)") enableSuite[GlutenMathFunctionsSuite] enableSuite[GlutenMetadataCacheSuite] .exclude("SPARK-16336,SPARK-27961 Suggest fixing FileNotFoundException") diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala index 4ac8bd3ea8bf..8a5a5923f729 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala @@ -16,9 +16,6 @@ */ package org.apache.spark.sql -import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec - class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { override def testNameBlackList: Seq[String] = Seq( @@ -55,14 +52,4 @@ class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { |""".stripMargin checkAnswer(spark.sql(sql), Seq(Row(0, 1), Row(1, 2), Row(2, 3))) } - - testGluten( - "SPARK-43113: Full outer join with duplicate stream-side" + - " references in condition (SHJ)") { - def check(plan: SparkPlan): Unit = { - assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) - } - - dupStreamSideColTest("MERGE", check) - } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 8d795bbffea0..332c21418a9b 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -84,11 +84,11 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq("COUNT", "COLLECT_LIST").foreach { - aggExpr => + Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + aggExprInfo => val query = s""" - |SELECT key, $aggExpr(key) + |SELECT key, ${aggExprInfo._1}(key) |FROM |( | SELECT /*+ SHUFFLE_MERGE(t1) */ t1.key AS key @@ -98,7 +98,7 @@ class GlutenReplaceHashWithSortAggSuite |) |GROUP BY key """.stripMargin - checkAggs(query, 2, 0, 2, 0) + checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) } } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index 00b4bf5821f1..4df9c63b3ef6 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.extension -import org.apache.gluten.extension.{ColumnarOverrideRules, JoinSelectionOverrides} +import org.apache.gluten.extension.ColumnarOverrideRules import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -31,7 +31,6 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { testGluten("test gluten extensions") { assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) - assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) assert(spark.sessionState.planner.strategies.contains(MySparkStrategy(spark))) assert(spark.sessionState.analyzer.extendedResolutionRules.contains(MyRule(spark))) diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 6162b5651980..e54aca34ec75 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -866,7 +866,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Shouldn't change broadcast join buildSide if user clearly specified") .exclude("Shouldn't bias towards build right if user didn't specify") .exclude("SPARK-23192: broadcast hint should be retained after using the cached data") - .exclude("broadcast hint isn't propagated after a join") .exclude("broadcast join where streamed side's output partitioning is HashPartitioning") enableSuite[GlutenExistenceJoinSuite] @@ -1136,9 +1135,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenJoinSuite] // exclude as it check spark plan .exclude("SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join") - // exclude as it check for SMJ node - .exclude( - "SPARK-43113: Full outer join with duplicate stream-side references in condition (SMJ)") enableSuite[GlutenMathFunctionsSuite] enableSuite[GlutenMetadataCacheSuite] .exclude("SPARK-16336,SPARK-27961 Suggest fixing FileNotFoundException") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala index 09718fb1a439..5ef4056201ed 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala @@ -16,9 +16,6 @@ */ package org.apache.spark.sql -import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec - class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { override def testNameBlackList: Seq[String] = Seq( @@ -57,14 +54,4 @@ class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { |""".stripMargin checkAnswer(spark.sql(sql), Seq(Row(0, 1), Row(1, 2), Row(2, 3))) } - - testGluten( - "SPARK-43113: Full outer join with duplicate stream-side" + - " references in condition (SHJ)") { - def check(plan: SparkPlan): Unit = { - assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) - } - - dupStreamSideColTest("MERGE", check) - } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 8d795bbffea0..332c21418a9b 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -84,11 +84,11 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq("COUNT", "COLLECT_LIST").foreach { - aggExpr => + Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + aggExprInfo => val query = s""" - |SELECT key, $aggExpr(key) + |SELECT key, ${aggExprInfo._1}(key) |FROM |( | SELECT /*+ SHUFFLE_MERGE(t1) */ t1.key AS key @@ -98,7 +98,7 @@ class GlutenReplaceHashWithSortAggSuite |) |GROUP BY key """.stripMargin - checkAggs(query, 2, 0, 2, 0) + checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) } } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index 00b4bf5821f1..4df9c63b3ef6 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.extension -import org.apache.gluten.extension.{ColumnarOverrideRules, JoinSelectionOverrides} +import org.apache.gluten.extension.ColumnarOverrideRules import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -31,7 +31,6 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { testGluten("test gluten extensions") { assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) - assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) assert(spark.sessionState.planner.strategies.contains(MySparkStrategy(spark))) assert(spark.sessionState.analyzer.extendedResolutionRules.contains(MyRule(spark))) diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index ec80ba86a7b9..4ef96bec27eb 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -153,9 +153,6 @@ class GlutenConfig(conf: SQLConf) extends Logging { def logicalJoinOptimizationThrottle: Integer = conf.getConf(COLUMNAR_LOGICAL_JOIN_OPTIMIZATION_THROTTLE) - def enableLogicalJoinOptimize: Boolean = - conf.getConf(COLUMNAR_LOGICAL_JOIN_OPTIMIZATION_ENABLED) - def enableScanOnly: Boolean = conf.getConf(COLUMNAR_SCAN_ONLY_ENABLED) def tmpFile: Option[String] = conf.getConf(COLUMNAR_TEMP_DIR) @@ -1007,13 +1004,6 @@ object GlutenConfig { .intConf .createWithDefault(12) - val COLUMNAR_LOGICAL_JOIN_OPTIMIZATION_ENABLED = - buildConf("spark.gluten.sql.columnar.logicalJoinOptimizeEnable") - .internal() - .doc("Enable or disable columnar logicalJoinOptimize.") - .booleanConf - .createWithDefault(false) - val COLUMNAR_SCAN_ONLY_ENABLED = buildConf("spark.gluten.sql.columnar.scanOnly") .internal() diff --git a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala deleted file mode 100644 index 20b9dea333a5..000000000000 --- a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.execution - -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, LogicalPlan} - -// https://issues.apache.org/jira/browse/SPARK-36745 -object JoinSelectionShim { - object ExtractEquiJoinKeysShim { - type ReturnType = - ( - JoinType, - Seq[Expression], - Seq[Expression], - Option[Expression], - LogicalPlan, - LogicalPlan, - JoinHint) - def unapply(join: Join): Option[ReturnType] = { - ExtractEquiJoinKeys.unapply(join).map { - case ( - joinType, - leftKeys, - rightKeys, - otherPredicates, - predicatesOfJoinKeys, - left, - right, - hint) => - (joinType, leftKeys, rightKeys, otherPredicates, left, right, hint) - } - } - } -} diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala deleted file mode 100644 index 20b9dea333a5..000000000000 --- a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.execution - -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, LogicalPlan} - -// https://issues.apache.org/jira/browse/SPARK-36745 -object JoinSelectionShim { - object ExtractEquiJoinKeysShim { - type ReturnType = - ( - JoinType, - Seq[Expression], - Seq[Expression], - Option[Expression], - LogicalPlan, - LogicalPlan, - JoinHint) - def unapply(join: Join): Option[ReturnType] = { - ExtractEquiJoinKeys.unapply(join).map { - case ( - joinType, - leftKeys, - rightKeys, - otherPredicates, - predicatesOfJoinKeys, - left, - right, - hint) => - (joinType, leftKeys, rightKeys, otherPredicates, left, right, hint) - } - } - } -} diff --git a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala deleted file mode 100644 index 20b9dea333a5..000000000000 --- a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.execution - -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, LogicalPlan} - -// https://issues.apache.org/jira/browse/SPARK-36745 -object JoinSelectionShim { - object ExtractEquiJoinKeysShim { - type ReturnType = - ( - JoinType, - Seq[Expression], - Seq[Expression], - Option[Expression], - LogicalPlan, - LogicalPlan, - JoinHint) - def unapply(join: Join): Option[ReturnType] = { - ExtractEquiJoinKeys.unapply(join).map { - case ( - joinType, - leftKeys, - rightKeys, - otherPredicates, - predicatesOfJoinKeys, - left, - right, - hint) => - (joinType, leftKeys, rightKeys, otherPredicates, left, right, hint) - } - } - } -}