diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml index fd110c32d828..8aeec14db8f8 100644 --- a/.github/workflows/velox_be.yml +++ b/.github/workflows/velox_be.yml @@ -103,9 +103,8 @@ jobs: path: | /tmp/${{ github.run_id }}/spark32/tpch-approved-plan/** - name: Clean temp golden files - if: failure() run: | - rm -rf /tmp/$GITHUB_RUN_ID/spark32/tpch-approved-plan + rm -rf /tmp/$GITHUB_RUN_ID/spark32 - name: Exit docker container if: ${{ always() }} run: | @@ -236,9 +235,8 @@ jobs: path: | /tmp/${{ github.run_id }}/spark33/tpch-approved-plan/** - name: Clean temp golden files - if: failure() run: | - rm -rf /tmp/$GITHUB_RUN_ID/spark33/tpch-approved-plan + rm -rf /tmp/$GITHUB_RUN_ID/spark33 - name: Exit docker container if: ${{ always() }} run: | @@ -308,6 +306,20 @@ jobs: $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark342" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ mvn test -Pspark-3.4 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' + - name: Copy golden files from container to host + if: failure() + run: | + $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/cp.sh /tmp/tpch-approved-plan/ /tmp/$GITHUB_RUN_ID/spark34/tpch-approved-plan + - name: Upload golden files + if: failure() + uses: actions/upload-artifact@v4 + with: + name: golden-files-spark34 + path: | + /tmp/${{ github.run_id }}/spark34/tpch-approved-plan/** + - name: Clean temp golden files + run: | + rm -rf /tmp/$GITHUB_RUN_ID/spark34 - name: Exit docker container if: ${{ always() }} run: | diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/1.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/1.txt new file mode 100644 index 000000000000..634f26c86f24 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/1.txt @@ -0,0 +1,154 @@ +== Physical Plan == +AdaptiveSparkPlan (28) ++- == Final Plan == + VeloxColumnarToRowExec (19) + +- ^ SortExecTransformer (17) + +- ^ InputIteratorTransformer (16) + +- ^ InputAdapter (15) + +- ^ ShuffleQueryStage (14), Statistics(X) + +- ColumnarExchange (13) + +- ^ RegularHashAggregateExecTransformer (11) + +- ^ InputIteratorTransformer (10) + +- ^ InputAdapter (9) + +- ^ ShuffleQueryStage (8), Statistics(X) + +- ColumnarExchange (7) + +- ^ ProjectExecTransformer (5) + +- ^ FlushableHashAggregateExecTransformer (4) + +- ^ ProjectExecTransformer (3) + +- ^ FilterExecTransformer (2) + +- ^ Scan parquet (1) ++- == Initial Plan == + Sort (27) + +- Exchange (26) + +- HashAggregate (25) + +- Exchange (24) + +- HashAggregate (23) + +- Project (22) + +- Filter (21) + +- Scan parquet (20) + + +(1) Scan parquet +Output [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), LessThanOrEqual(l_shipdate,1998-09-02)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] +Arguments: (isnotnull(l_shipdate#X) AND (l_shipdate#X <= 1998-09-02)) + +(3) ProjectExecTransformer +Output [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X, ((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)) AS _pre_X#X] +Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] + +(4) FlushableHashAggregateExecTransformer +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, _pre_X#X, _pre_X#X] +Keys [2]: [l_returnflag#X, l_linestatus#X] +Functions [8]: [partial_sum(l_quantity#X), partial_sum(l_extendedprice#X), partial_sum(_pre_X#X), partial_sum(_pre_X#X), partial_avg(l_quantity#X), partial_avg(l_extendedprice#X), partial_avg(l_discount#X), partial_count(1)] +Aggregate Attributes [15]: [sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Results [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(5) ProjectExecTransformer +Output [18]: [hash(l_returnflag#X, l_linestatus#X, 42) AS hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(6) WholeStageCodegenTransformer (X) +Input [18]: [hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Arguments: false + +(7) ColumnarExchange +Input [18]: [hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Arguments: hashpartitioning(l_returnflag#X, l_linestatus#X, 1), ENSURE_REQUIREMENTS, [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X], [plan_id=X], [id=#X] + +(8) ShuffleQueryStage +Output [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Arguments: X + +(9) InputAdapter +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(10) InputIteratorTransformer +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(11) RegularHashAggregateExecTransformer +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Keys [2]: [l_returnflag#X, l_linestatus#X] +Functions [8]: [sum(l_quantity#X), sum(l_extendedprice#X), sum((l_extendedprice#X * (1 - l_discount#X))), sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), avg(l_quantity#X), avg(l_extendedprice#X), avg(l_discount#X), count(1)] +Aggregate Attributes [8]: [sum(l_quantity#X)#X, sum(l_extendedprice#X)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X, avg(l_quantity#X)#X, avg(l_extendedprice#X)#X, avg(l_discount#X)#X, count(1)#X] +Results [10]: [l_returnflag#X, l_linestatus#X, sum(l_quantity#X)#X AS sum_qty#X, sum(l_extendedprice#X)#X AS sum_base_price#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS sum_disc_price#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X AS sum_charge#X, avg(l_quantity#X)#X AS avg_qty#X, avg(l_extendedprice#X)#X AS avg_price#X, avg(l_discount#X)#X AS avg_disc#X, count(1)#X AS count_order#X] + +(12) WholeStageCodegenTransformer (X) +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: false + +(13) ColumnarExchange +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: rangepartitioning(l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: X + +(15) InputAdapter +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] + +(16) InputIteratorTransformer +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] + +(17) SortExecTransformer +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: [l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST], true, 0 + +(18) WholeStageCodegenTransformer (X) +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: false + +(19) VeloxColumnarToRowExec +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] + +(20) Scan parquet +Output [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), LessThanOrEqual(l_shipdate,1998-09-02)] +ReadSchema: struct + +(21) Filter +Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] +Condition : (isnotnull(l_shipdate#X) AND (l_shipdate#X <= 1998-09-02)) + +(22) Project +Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X] +Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] + +(23) HashAggregate +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X] +Keys [2]: [l_returnflag#X, l_linestatus#X] +Functions [8]: [partial_sum(l_quantity#X), partial_sum(l_extendedprice#X), partial_sum((l_extendedprice#X * (1 - l_discount#X))), partial_sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), partial_avg(l_quantity#X), partial_avg(l_extendedprice#X), partial_avg(l_discount#X), partial_count(1)] +Aggregate Attributes [15]: [sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Results [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(24) Exchange +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Arguments: hashpartitioning(l_returnflag#X, l_linestatus#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(25) HashAggregate +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Keys [2]: [l_returnflag#X, l_linestatus#X] +Functions [8]: [sum(l_quantity#X), sum(l_extendedprice#X), sum((l_extendedprice#X * (1 - l_discount#X))), sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), avg(l_quantity#X), avg(l_extendedprice#X), avg(l_discount#X), count(1)] +Aggregate Attributes [8]: [sum(l_quantity#X)#X, sum(l_extendedprice#X)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X, avg(l_quantity#X)#X, avg(l_extendedprice#X)#X, avg(l_discount#X)#X, count(1)#X] +Results [10]: [l_returnflag#X, l_linestatus#X, sum(l_quantity#X)#X AS sum_qty#X, sum(l_extendedprice#X)#X AS sum_base_price#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS sum_disc_price#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X AS sum_charge#X, avg(l_quantity#X)#X AS avg_qty#X, avg(l_extendedprice#X)#X AS avg_price#X, avg(l_discount#X)#X AS avg_disc#X, count(1)#X AS count_order#X] + +(26) Exchange +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: rangepartitioning(l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(27) Sort +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: [l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST], true, 0 + +(28) AdaptiveSparkPlan +Output [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/10.txt new file mode 100644 index 000000000000..aceee049fd5b --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/10.txt @@ -0,0 +1,373 @@ +== Physical Plan == +AdaptiveSparkPlan (67) ++- == Final Plan == + VeloxColumnarToRowExec (43) + +- TakeOrderedAndProjectExecTransformer (42) + +- ^ ProjectExecTransformer (40) + +- ^ RegularHashAggregateExecTransformer (39) + +- ^ InputIteratorTransformer (38) + +- ^ InputAdapter (37) + +- ^ ShuffleQueryStage (36), Statistics(X) + +- ColumnarExchange (35) + +- ^ ProjectExecTransformer (33) + +- ^ FlushableHashAggregateExecTransformer (32) + +- ^ ProjectExecTransformer (31) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (30) + :- ^ ProjectExecTransformer (22) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (21) + : :- ^ ProjectExecTransformer (12) + : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (11) + : : :- ^ FilterExecTransformer (2) + : : : +- ^ Scan parquet (1) + : : +- ^ InputIteratorTransformer (10) + : : +- ^ InputAdapter (9) + : : +- ^ BroadcastQueryStage (8), Statistics(X) + : : +- ColumnarBroadcastExchange (7) + : : +- ^ ProjectExecTransformer (5) + : : +- ^ FilterExecTransformer (4) + : : +- ^ Scan parquet (3) + : +- ^ InputIteratorTransformer (20) + : +- ^ InputAdapter (19) + : +- ^ BroadcastQueryStage (18), Statistics(X) + : +- ColumnarBroadcastExchange (17) + : +- ^ ProjectExecTransformer (15) + : +- ^ FilterExecTransformer (14) + : +- ^ Scan parquet (13) + +- ^ InputIteratorTransformer (29) + +- ^ InputAdapter (28) + +- ^ BroadcastQueryStage (27), Statistics(X) + +- ColumnarBroadcastExchange (26) + +- ^ FilterExecTransformer (24) + +- ^ Scan parquet (23) ++- == Initial Plan == + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- BroadcastHashJoin Inner BuildRight (61) + :- Project (57) + : +- BroadcastHashJoin Inner BuildRight (56) + : :- Project (51) + : : +- BroadcastHashJoin Inner BuildRight (50) + : : :- Filter (45) + : : : +- Scan parquet (44) + : : +- BroadcastExchange (49) + : : +- Project (48) + : : +- Filter (47) + : : +- Scan parquet (46) + : +- BroadcastExchange (55) + : +- Project (54) + : +- Filter (53) + : +- Scan parquet (52) + +- BroadcastExchange (60) + +- Filter (59) + +- Scan parquet (58) + + +(1) Scan parquet +Output [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(3) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(4) FilterExecTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(5) ProjectExecTransformer +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(6) WholeStageCodegenTransformer (X) +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: false + +(7) ColumnarBroadcastExchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=X] + +(8) BroadcastQueryStage +Output [2]: [o_orderkey#X, o_custkey#X] +Arguments: X + +(9) InputAdapter +Input [2]: [o_orderkey#X, o_custkey#X] + +(10) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_custkey#X] + +(11) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(12) ProjectExecTransformer +Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] + +(13) Scan parquet +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] +ReadSchema: struct + +(14) FilterExecTransformer +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] +Arguments: ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) + +(15) ProjectExecTransformer +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] + +(16) WholeStageCodegenTransformer (X) +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(17) ColumnarBroadcastExchange +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(18) BroadcastQueryStage +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(19) InputAdapter +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] + +(20) InputIteratorTransformer +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] + +(21) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(22) ProjectExecTransformer +Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] + +(23) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(24) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: isnotnull(n_nationkey#X) + +(25) WholeStageCodegenTransformer (X) +Input [2]: [n_nationkey#X, n_name#X] +Arguments: false + +(26) ColumnarBroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(27) BroadcastQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(28) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(29) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(30) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(31) ProjectExecTransformer +Output [10]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] + +(32) FlushableHashAggregateExecTransformer +Input [10]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X, _pre_X#X] +Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(33) ProjectExecTransformer +Output [10]: [hash(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 42) AS hash_partition_key#X, c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(34) WholeStageCodegenTransformer (X) +Input [10]: [hash_partition_key#X, c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Arguments: false + +(35) ColumnarExchange +Input [10]: [hash_partition_key#X, c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(36) ShuffleQueryStage +Output [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Arguments: X + +(37) InputAdapter +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(38) InputIteratorTransformer +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(39) RegularHashAggregateExecTransformer +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [8]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(40) ProjectExecTransformer +Output [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Input [8]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(41) WholeStageCodegenTransformer (X) +Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Arguments: false + +(42) TakeOrderedAndProjectExecTransformer +Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X], 0 + +(43) VeloxColumnarToRowExec +Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] + +(44) Scan parquet +Output [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(45) Filter +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(46) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(47) Filter +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(48) Project +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(49) BroadcastExchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=X] + +(50) BroadcastHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(51) Project +Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] + +(52) Scan parquet +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] +ReadSchema: struct + +(53) Filter +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] +Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) + +(54) Project +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] + +(55) BroadcastExchange +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(56) BroadcastHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(57) Project +Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] + +(58) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(59) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : isnotnull(n_nationkey#X) + +(60) BroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(61) BroadcastHashJoin +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(62) Project +Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] + +(63) HashAggregate +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] +Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(64) Exchange +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(65) HashAggregate +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] + +(66) TakeOrderedAndProject +Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] + +(67) AdaptiveSparkPlan +Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/11.txt new file mode 100644 index 000000000000..bdb7a79c98ac --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/11.txt @@ -0,0 +1,551 @@ +== Physical Plan == +AdaptiveSparkPlan (58) ++- == Final Plan == + VeloxColumnarToRowExec (38) + +- ^ SortExecTransformer (36) + +- ^ InputIteratorTransformer (35) + +- ^ InputAdapter (34) + +- ^ ShuffleQueryStage (33), Statistics(X) + +- ColumnarExchange (32) + +- ^ FilterExecTransformer (30) + +- ^ RegularHashAggregateExecTransformer (29) + +- ^ InputIteratorTransformer (28) + +- ^ InputAdapter (27) + +- ^ ShuffleQueryStage (26), Statistics(X) + +- ColumnarExchange (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (20) + :- ^ ProjectExecTransformer (11) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (10) + : :- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (9) + : +- ^ InputAdapter (8) + : +- ^ BroadcastQueryStage (7), Statistics(X) + : +- ColumnarBroadcastExchange (6) + : +- ^ FilterExecTransformer (4) + : +- ^ Scan parquet (3) + +- ^ InputIteratorTransformer (19) + +- ^ InputAdapter (18) + +- ^ BroadcastQueryStage (17), Statistics(X) + +- ColumnarBroadcastExchange (16) + +- ^ ProjectExecTransformer (14) + +- ^ FilterExecTransformer (13) + +- ^ Scan parquet (12) ++- == Initial Plan == + Sort (57) + +- Exchange (56) + +- Filter (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- Project (51) + +- BroadcastHashJoin Inner BuildRight (50) + :- Project (45) + : +- BroadcastHashJoin Inner BuildRight (44) + : :- Filter (40) + : : +- Scan parquet (39) + : +- BroadcastExchange (43) + : +- Filter (42) + : +- Scan parquet (41) + +- BroadcastExchange (49) + +- Project (48) + +- Filter (47) + +- Scan parquet (46) + + +(1) Scan parquet +Output [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: isnotnull(ps_suppkey#X) + +(3) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(4) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(5) WholeStageCodegenTransformer (X) +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: false + +(6) ColumnarBroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(7) BroadcastQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(8) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(9) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(10) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(11) ProjectExecTransformer +Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] + +(12) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] +ReadSchema: struct + +(13) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) + +(14) ProjectExecTransformer +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(15) WholeStageCodegenTransformer (X) +Input [1]: [n_nationkey#X] +Arguments: false + +(16) ColumnarBroadcastExchange +Input [1]: [n_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(17) BroadcastQueryStage +Output [1]: [n_nationkey#X] +Arguments: X + +(18) InputAdapter +Input [1]: [n_nationkey#X] + +(19) InputIteratorTransformer +Input [1]: [n_nationkey#X] + +(20) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(21) ProjectExecTransformer +Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] +Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] + +(22) FlushableHashAggregateExecTransformer +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, _pre_X#X] +Keys [1]: [ps_partkey#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(23) ProjectExecTransformer +Output [4]: [hash(ps_partkey#X, 42) AS hash_partition_key#X, ps_partkey#X, sum#X, isEmpty#X] +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(24) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, ps_partkey#X, sum#X, isEmpty#X] +Arguments: false + +(25) ColumnarExchange +Input [4]: [hash_partition_key#X, ps_partkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [ps_partkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(26) ShuffleQueryStage +Output [3]: [ps_partkey#X, sum#X, isEmpty#X] +Arguments: X + +(27) InputAdapter +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(28) InputIteratorTransformer +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(29) RegularHashAggregateExecTransformer +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] +Keys [1]: [ps_partkey#X] +Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] +Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] + +(30) FilterExecTransformer +Input [2]: [ps_partkey#X, value#X] +Arguments: (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) + +(31) WholeStageCodegenTransformer (X) +Input [2]: [ps_partkey#X, value#X] +Arguments: false + +(32) ColumnarExchange +Input [2]: [ps_partkey#X, value#X] +Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(33) ShuffleQueryStage +Output [2]: [ps_partkey#X, value#X] +Arguments: X + +(34) InputAdapter +Input [2]: [ps_partkey#X, value#X] + +(35) InputIteratorTransformer +Input [2]: [ps_partkey#X, value#X] + +(36) SortExecTransformer +Input [2]: [ps_partkey#X, value#X] +Arguments: [value#X DESC NULLS LAST], true, 0 + +(37) WholeStageCodegenTransformer (X) +Input [2]: [ps_partkey#X, value#X] +Arguments: false + +(38) VeloxColumnarToRowExec +Input [2]: [ps_partkey#X, value#X] + +(39) Scan parquet +Output [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey)] +ReadSchema: struct + +(40) Filter +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Condition : isnotnull(ps_suppkey#X) + +(41) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(42) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(43) BroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(44) BroadcastHashJoin +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(45) Project +Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] + +(46) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] +ReadSchema: struct + +(47) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) + +(48) Project +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(49) BroadcastExchange +Input [1]: [n_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(50) BroadcastHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(51) Project +Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] +Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] + +(52) HashAggregate +Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] +Keys [1]: [ps_partkey#X] +Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(53) Exchange +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(54) HashAggregate +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] +Keys [1]: [ps_partkey#X] +Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] +Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] + +(55) Filter +Input [2]: [ps_partkey#X, value#X] +Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) + +(56) Exchange +Input [2]: [ps_partkey#X, value#X] +Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(57) Sort +Input [2]: [ps_partkey#X, value#X] +Arguments: [value#X DESC NULLS LAST], true, 0 + +(58) AdaptiveSparkPlan +Output [2]: [ps_partkey#X, value#X] +Arguments: isFinalPlan=true + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 30 Hosting Expression = Subquery subquery#X, [id=#X] +AdaptiveSparkPlan (99) ++- == Final Plan == + VeloxColumnarToRowExec (82) + +- ^ ProjectExecTransformer (80) + +- ^ RegularHashAggregateExecTransformer (79) + +- ^ InputIteratorTransformer (78) + +- ^ InputAdapter (77) + +- ^ ShuffleQueryStage (76), Statistics(X) + +- ColumnarExchange (75) + +- ^ FlushableHashAggregateExecTransformer (73) + +- ^ ProjectExecTransformer (72) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (71) + :- ^ ProjectExecTransformer (66) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (65) + : :- ^ FilterExecTransformer (60) + : : +- ^ Scan parquet (59) + : +- ^ InputIteratorTransformer (64) + : +- ^ InputAdapter (63) + : +- ^ BroadcastQueryStage (62), Statistics(X) + : +- ReusedExchange (61) + +- ^ InputIteratorTransformer (70) + +- ^ InputAdapter (69) + +- ^ BroadcastQueryStage (68), Statistics(X) + +- ReusedExchange (67) ++- == Initial Plan == + HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- BroadcastHashJoin Inner BuildRight (94) + :- Project (89) + : +- BroadcastHashJoin Inner BuildRight (88) + : :- Filter (84) + : : +- Scan parquet (83) + : +- BroadcastExchange (87) + : +- Filter (86) + : +- Scan parquet (85) + +- BroadcastExchange (93) + +- Project (92) + +- Filter (91) + +- Scan parquet (90) + + +(59) Scan parquet +Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey)] +ReadSchema: struct + +(60) FilterExecTransformer +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: isnotnull(ps_suppkey#X) + +(61) ReusedExchange [Reuses operator id: 6] +Output [2]: [s_suppkey#X, s_nationkey#X] + +(62) BroadcastQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(63) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(64) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(65) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(66) ProjectExecTransformer +Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] + +(67) ReusedExchange [Reuses operator id: 16] +Output [1]: [n_nationkey#X] + +(68) BroadcastQueryStage +Output [1]: [n_nationkey#X] +Arguments: X + +(69) InputAdapter +Input [1]: [n_nationkey#X] + +(70) InputIteratorTransformer +Input [1]: [n_nationkey#X] + +(71) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(72) ProjectExecTransformer +Output [3]: [ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] +Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] + +(73) FlushableHashAggregateExecTransformer +Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] +Keys: [] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(74) WholeStageCodegenTransformer (X) +Input [2]: [sum#X, isEmpty#X] +Arguments: false + +(75) ColumnarExchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(76) ShuffleQueryStage +Output [2]: [sum#X, isEmpty#X] +Arguments: X + +(77) InputAdapter +Input [2]: [sum#X, isEmpty#X] + +(78) InputIteratorTransformer +Input [2]: [sum#X, isEmpty#X] + +(79) RegularHashAggregateExecTransformer +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] +Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] + +(80) ProjectExecTransformer +Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] +Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] + +(81) WholeStageCodegenTransformer (X) +Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] +Arguments: false + +(82) VeloxColumnarToRowExec +Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] + +(83) Scan parquet +Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey)] +ReadSchema: struct + +(84) Filter +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Condition : isnotnull(ps_suppkey#X) + +(85) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(86) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(87) BroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(88) BroadcastHashJoin +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(89) Project +Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] + +(90) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] +ReadSchema: struct + +(91) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) + +(92) Project +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(93) BroadcastExchange +Input [1]: [n_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(94) BroadcastHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(95) Project +Output [2]: [ps_availqty#X, ps_supplycost#X] +Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] + +(96) HashAggregate +Input [2]: [ps_availqty#X, ps_supplycost#X] +Keys: [] +Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(97) Exchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] + +(98) HashAggregate +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] +Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] + +(99) AdaptiveSparkPlan +Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/12.txt new file mode 100644 index 000000000000..5b48d4460cb2 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/12.txt @@ -0,0 +1,231 @@ +== Physical Plan == +AdaptiveSparkPlan (42) ++- == Final Plan == + VeloxColumnarToRowExec (28) + +- ^ SortExecTransformer (26) + +- ^ InputIteratorTransformer (25) + +- ^ InputAdapter (24) + +- ^ ShuffleQueryStage (23), Statistics(X) + +- ColumnarExchange (22) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ InputIteratorTransformer (19) + +- ^ InputAdapter (18) + +- ^ ShuffleQueryStage (17), Statistics(X) + +- ColumnarExchange (16) + +- ^ ProjectExecTransformer (14) + +- ^ FlushableHashAggregateExecTransformer (13) + +- ^ ProjectExecTransformer (12) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (11) + :- ^ InputIteratorTransformer (7) + : +- ^ InputAdapter (6) + : +- ^ BroadcastQueryStage (5), Statistics(X) + : +- ColumnarBroadcastExchange (4) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ ProjectExecTransformer (10) + +- ^ FilterExecTransformer (9) + +- ^ Scan parquet (8) ++- == Initial Plan == + Sort (41) + +- Exchange (40) + +- HashAggregate (39) + +- Exchange (38) + +- HashAggregate (37) + +- Project (36) + +- BroadcastHashJoin Inner BuildLeft (35) + :- BroadcastExchange (31) + : +- Filter (30) + : +- Scan parquet (29) + +- Project (34) + +- Filter (33) + +- Scan parquet (32) + + +(1) Scan parquet +Output [2]: [o_orderkey#X, o_orderpriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: isnotnull(o_orderkey#X) + +(3) WholeStageCodegenTransformer (X) +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: false + +(4) ColumnarBroadcastExchange +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(5) BroadcastQueryStage +Output [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: X + +(6) InputAdapter +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(7) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(8) Scan parquet +Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] +ReadSchema: struct + +(9) FilterExecTransformer +Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] +Arguments: ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) + +(10) ProjectExecTransformer +Output [2]: [l_orderkey#X, l_shipmode#X] +Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] + +(11) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(12) ProjectExecTransformer +Output [4]: [o_orderpriority#X, l_shipmode#X, CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END AS _pre_X#X, CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END AS _pre_X#X] +Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] + +(13) FlushableHashAggregateExecTransformer +Input [4]: [o_orderpriority#X, l_shipmode#X, _pre_X#X, _pre_X#X] +Keys [1]: [l_shipmode#X] +Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, sum#X] +Results [3]: [l_shipmode#X, sum#X, sum#X] + +(14) ProjectExecTransformer +Output [4]: [hash(l_shipmode#X, 42) AS hash_partition_key#X, l_shipmode#X, sum#X, sum#X] +Input [3]: [l_shipmode#X, sum#X, sum#X] + +(15) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_shipmode#X, sum#X, sum#X] +Arguments: false + +(16) ColumnarExchange +Input [4]: [hash_partition_key#X, l_shipmode#X, sum#X, sum#X] +Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [l_shipmode#X, sum#X, sum#X], [plan_id=X], [id=#X] + +(17) ShuffleQueryStage +Output [3]: [l_shipmode#X, sum#X, sum#X] +Arguments: X + +(18) InputAdapter +Input [3]: [l_shipmode#X, sum#X, sum#X] + +(19) InputIteratorTransformer +Input [3]: [l_shipmode#X, sum#X, sum#X] + +(20) RegularHashAggregateExecTransformer +Input [3]: [l_shipmode#X, sum#X, sum#X] +Keys [1]: [l_shipmode#X] +Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] +Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] + +(21) WholeStageCodegenTransformer (X) +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: false + +(22) ColumnarExchange +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(23) ShuffleQueryStage +Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: X + +(24) InputAdapter +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] + +(25) InputIteratorTransformer +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] + +(26) SortExecTransformer +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 + +(27) WholeStageCodegenTransformer (X) +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: false + +(28) VeloxColumnarToRowExec +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] + +(29) Scan parquet +Output [2]: [o_orderkey#X, o_orderpriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey)] +ReadSchema: struct + +(30) Filter +Input [2]: [o_orderkey#X, o_orderpriority#X] +Condition : isnotnull(o_orderkey#X) + +(31) BroadcastExchange +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(32) Scan parquet +Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] +ReadSchema: struct + +(33) Filter +Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] +Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) + +(34) Project +Output [2]: [l_orderkey#X, l_shipmode#X] +Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] + +(35) BroadcastHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(36) Project +Output [2]: [o_orderpriority#X, l_shipmode#X] +Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] + +(37) HashAggregate +Input [2]: [o_orderpriority#X, l_shipmode#X] +Keys [1]: [l_shipmode#X] +Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] +Aggregate Attributes [2]: [sum#X, sum#X] +Results [3]: [l_shipmode#X, sum#X, sum#X] + +(38) Exchange +Input [3]: [l_shipmode#X, sum#X, sum#X] +Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(39) HashAggregate +Input [3]: [l_shipmode#X, sum#X, sum#X] +Keys [1]: [l_shipmode#X] +Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] +Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] + +(40) Exchange +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(41) Sort +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 + +(42) AdaptiveSparkPlan +Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/13.txt new file mode 100644 index 000000000000..6a05d769c7c3 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/13.txt @@ -0,0 +1,288 @@ +== Physical Plan == +AdaptiveSparkPlan (52) ++- == Final Plan == + VeloxColumnarToRowExec (36) + +- ^ SortExecTransformer (34) + +- ^ InputIteratorTransformer (33) + +- ^ InputAdapter (32) + +- ^ ShuffleQueryStage (31), Statistics(X) + +- ColumnarExchange (30) + +- ^ RegularHashAggregateExecTransformer (28) + +- ^ InputIteratorTransformer (27) + +- ^ InputAdapter (26) + +- ^ ShuffleQueryStage (25), Statistics(X) + +- ColumnarExchange (24) + +- ^ ProjectExecTransformer (22) + +- ^ FlushableHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ RegularHashAggregateExecTransformer (19) + +- ^ InputIteratorTransformer (18) + +- ^ InputAdapter (17) + +- ^ ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- ^ ProjectExecTransformer (13) + +- ^ FlushableHashAggregateExecTransformer (12) + +- ^ ProjectExecTransformer (11) + +- ^ GlutenBroadcastHashJoinExecTransformer LeftOuter (10) + :- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (9) + +- ^ InputAdapter (8) + +- ^ BroadcastQueryStage (7), Statistics(X) + +- ColumnarBroadcastExchange (6) + +- ^ ProjectExecTransformer (4) + +- ^ FilterExecTransformer (3) + +- ^ Scan parquet (2) ++- == Initial Plan == + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- HashAggregate (46) + +- Exchange (45) + +- HashAggregate (44) + +- Project (43) + +- BroadcastHashJoin LeftOuter BuildRight (42) + :- Scan parquet (37) + +- BroadcastExchange (41) + +- Project (40) + +- Filter (39) + +- Scan parquet (38) + + +(1) Scan parquet +Output [1]: [c_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(2) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] +ReadSchema: struct + +(3) FilterExecTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] +Arguments: ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) + +(4) ProjectExecTransformer +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] + +(5) WholeStageCodegenTransformer (X) +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: false + +(6) ColumnarBroadcastExchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=X] + +(7) BroadcastQueryStage +Output [2]: [o_orderkey#X, o_custkey#X] +Arguments: X + +(8) InputAdapter +Input [2]: [o_orderkey#X, o_custkey#X] + +(9) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_custkey#X] + +(10) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: LeftOuter +Join condition: None + +(11) ProjectExecTransformer +Output [2]: [c_custkey#X, o_orderkey#X] +Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] + +(12) FlushableHashAggregateExecTransformer +Input [2]: [c_custkey#X, o_orderkey#X] +Keys [1]: [c_custkey#X] +Functions [1]: [partial_count(o_orderkey#X)] +Aggregate Attributes [1]: [count#X] +Results [2]: [c_custkey#X, count#X] + +(13) ProjectExecTransformer +Output [3]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X, count#X] +Input [2]: [c_custkey#X, count#X] + +(14) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, c_custkey#X, count#X] +Arguments: false + +(15) ColumnarExchange +Input [3]: [hash_partition_key#X, c_custkey#X, count#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, count#X], [plan_id=X], [id=#X] + +(16) ShuffleQueryStage +Output [2]: [c_custkey#X, count#X] +Arguments: X + +(17) InputAdapter +Input [2]: [c_custkey#X, count#X] + +(18) InputIteratorTransformer +Input [2]: [c_custkey#X, count#X] + +(19) RegularHashAggregateExecTransformer +Input [2]: [c_custkey#X, count#X] +Keys [1]: [c_custkey#X] +Functions [1]: [count(o_orderkey#X)] +Aggregate Attributes [1]: [count(o_orderkey#X)#X] +Results [2]: [c_custkey#X, count(o_orderkey#X)#X] + +(20) ProjectExecTransformer +Output [1]: [count(o_orderkey#X)#X AS c_count#X] +Input [2]: [c_custkey#X, count(o_orderkey#X)#X] + +(21) FlushableHashAggregateExecTransformer +Input [1]: [c_count#X] +Keys [1]: [c_count#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [c_count#X, count#X] + +(22) ProjectExecTransformer +Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] +Input [2]: [c_count#X, count#X] + +(23) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, c_count#X, count#X] +Arguments: false + +(24) ColumnarExchange +Input [3]: [hash_partition_key#X, c_count#X, count#X] +Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [id=#X] + +(25) ShuffleQueryStage +Output [2]: [c_count#X, count#X] +Arguments: X + +(26) InputAdapter +Input [2]: [c_count#X, count#X] + +(27) InputIteratorTransformer +Input [2]: [c_count#X, count#X] + +(28) RegularHashAggregateExecTransformer +Input [2]: [c_count#X, count#X] +Keys [1]: [c_count#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [c_count#X, count(1)#X AS custdist#X] + +(29) WholeStageCodegenTransformer (X) +Input [2]: [c_count#X, custdist#X] +Arguments: false + +(30) ColumnarExchange +Input [2]: [c_count#X, custdist#X] +Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(31) ShuffleQueryStage +Output [2]: [c_count#X, custdist#X] +Arguments: X + +(32) InputAdapter +Input [2]: [c_count#X, custdist#X] + +(33) InputIteratorTransformer +Input [2]: [c_count#X, custdist#X] + +(34) SortExecTransformer +Input [2]: [c_count#X, custdist#X] +Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 + +(35) WholeStageCodegenTransformer (X) +Input [2]: [c_count#X, custdist#X] +Arguments: false + +(36) VeloxColumnarToRowExec +Input [2]: [c_count#X, custdist#X] + +(37) Scan parquet +Output [1]: [c_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(38) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] +ReadSchema: struct + +(39) Filter +Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] +Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) + +(40) Project +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] + +(41) BroadcastExchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=X] + +(42) BroadcastHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: LeftOuter +Join condition: None + +(43) Project +Output [2]: [c_custkey#X, o_orderkey#X] +Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] + +(44) HashAggregate +Input [2]: [c_custkey#X, o_orderkey#X] +Keys [1]: [c_custkey#X] +Functions [1]: [partial_count(o_orderkey#X)] +Aggregate Attributes [1]: [count#X] +Results [2]: [c_custkey#X, count#X] + +(45) Exchange +Input [2]: [c_custkey#X, count#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(46) HashAggregate +Input [2]: [c_custkey#X, count#X] +Keys [1]: [c_custkey#X] +Functions [1]: [count(o_orderkey#X)] +Aggregate Attributes [1]: [count(o_orderkey#X)#X] +Results [1]: [count(o_orderkey#X)#X AS c_count#X] + +(47) HashAggregate +Input [1]: [c_count#X] +Keys [1]: [c_count#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [c_count#X, count#X] + +(48) Exchange +Input [2]: [c_count#X, count#X] +Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(49) HashAggregate +Input [2]: [c_count#X, count#X] +Keys [1]: [c_count#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [c_count#X, count(1)#X AS custdist#X] + +(50) Exchange +Input [2]: [c_count#X, custdist#X] +Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(51) Sort +Input [2]: [c_count#X, custdist#X] +Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 + +(52) AdaptiveSparkPlan +Output [2]: [c_count#X, custdist#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/14.txt new file mode 100644 index 000000000000..7ea3990c16ad --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/14.txt @@ -0,0 +1,194 @@ +== Physical Plan == +AdaptiveSparkPlan (34) ++- == Final Plan == + VeloxColumnarToRowExec (22) + +- ^ ProjectExecTransformer (20) + +- ^ RegularHashAggregateExecTransformer (19) + +- ^ InputIteratorTransformer (18) + +- ^ InputAdapter (17) + +- ^ ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- ^ FlushableHashAggregateExecTransformer (13) + +- ^ ProjectExecTransformer (12) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (11) + :- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (10) + +- ^ InputAdapter (9) + +- ^ BroadcastQueryStage (8), Statistics(X) + +- ColumnarBroadcastExchange (7) + +- ^ FilterExecTransformer (5) + +- ^ Scan parquet (4) ++- == Initial Plan == + HashAggregate (33) + +- Exchange (32) + +- HashAggregate (31) + +- Project (30) + +- BroadcastHashJoin Inner BuildRight (29) + :- Project (25) + : +- Filter (24) + : +- Scan parquet (23) + +- BroadcastExchange (28) + +- Filter (27) + +- Scan parquet (26) + + +(1) Scan parquet +Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) + +(3) ProjectExecTransformer +Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(4) Scan parquet +Output [2]: [p_partkey#X, p_type#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_partkey)] +ReadSchema: struct + +(5) FilterExecTransformer +Input [2]: [p_partkey#X, p_type#X] +Arguments: isnotnull(p_partkey#X) + +(6) WholeStageCodegenTransformer (X) +Input [2]: [p_partkey#X, p_type#X] +Arguments: false + +(7) ColumnarBroadcastExchange +Input [2]: [p_partkey#X, p_type#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(8) BroadcastQueryStage +Output [2]: [p_partkey#X, p_type#X] +Arguments: X + +(9) InputAdapter +Input [2]: [p_partkey#X, p_type#X] + +(10) InputIteratorTransformer +Input [2]: [p_partkey#X, p_type#X] + +(11) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(12) ProjectExecTransformer +Output [5]: [l_extendedprice#X, l_discount#X, p_type#X, CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END AS _pre_X#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] + +(13) FlushableHashAggregateExecTransformer +Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] +Keys: [] +Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] +Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] + +(14) WholeStageCodegenTransformer (X) +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: false + +(15) ColumnarExchange +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(16) ShuffleQueryStage +Output [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: X + +(17) InputAdapter +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] + +(18) InputIteratorTransformer +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] + +(19) RegularHashAggregateExecTransformer +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Keys: [] +Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(20) ProjectExecTransformer +Output [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] +Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(21) WholeStageCodegenTransformer (X) +Input [1]: [promo_revenue#X] +Arguments: false + +(22) VeloxColumnarToRowExec +Input [1]: [promo_revenue#X] + +(23) Scan parquet +Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] +ReadSchema: struct + +(24) Filter +Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) + +(25) Project +Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(26) Scan parquet +Output [2]: [p_partkey#X, p_type#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_partkey)] +ReadSchema: struct + +(27) Filter +Input [2]: [p_partkey#X, p_type#X] +Condition : isnotnull(p_partkey#X) + +(28) BroadcastExchange +Input [2]: [p_partkey#X, p_type#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(29) BroadcastHashJoin +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(30) Project +Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] +Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] + +(31) HashAggregate +Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] +Keys: [] +Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] + +(32) Exchange +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] + +(33) HashAggregate +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Keys: [] +Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] + +(34) AdaptiveSparkPlan +Output [1]: [promo_revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt new file mode 100644 index 000000000000..5d54df34b843 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt @@ -0,0 +1,386 @@ +== Physical Plan == +AdaptiveSparkPlan (41) ++- == Final Plan == + VeloxColumnarToRowExec (26) + +- AQEShuffleRead (25) + +- ShuffleQueryStage (24), Statistics(X) + +- ColumnarExchange (23) + +- ^ ProjectExecTransformer (21) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (20) + :- ^ InputIteratorTransformer (7) + : +- ^ InputAdapter (6) + : +- ^ BroadcastQueryStage (5), Statistics(X) + : +- ColumnarBroadcastExchange (4) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ FilterExecTransformer (19) + +- ^ RegularHashAggregateExecTransformer (18) + +- ^ InputIteratorTransformer (17) + +- ^ InputAdapter (16) + +- ^ ShuffleQueryStage (15), Statistics(X) + +- ColumnarExchange (14) + +- ^ ProjectExecTransformer (12) + +- ^ FlushableHashAggregateExecTransformer (11) + +- ^ ProjectExecTransformer (10) + +- ^ FilterExecTransformer (9) + +- ^ Scan parquet (8) ++- == Initial Plan == + Sort (40) + +- Exchange (39) + +- Project (38) + +- BroadcastHashJoin Inner BuildLeft (37) + :- BroadcastExchange (29) + : +- Filter (28) + : +- Scan parquet (27) + +- Filter (36) + +- HashAggregate (35) + +- Exchange (34) + +- HashAggregate (33) + +- Project (32) + +- Filter (31) + +- Scan parquet (30) + + +(1) Scan parquet +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: isnotnull(s_suppkey#X) + +(3) WholeStageCodegenTransformer (X) +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: false + +(4) ColumnarBroadcastExchange +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(5) BroadcastQueryStage +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: X + +(6) InputAdapter +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] + +(7) InputIteratorTransformer +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] + +(8) Scan parquet +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] +ReadSchema: struct + +(9) FilterExecTransformer +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) + +(10) ProjectExecTransformer +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(11) FlushableHashAggregateExecTransformer +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(12) ProjectExecTransformer +Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(13) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: false + +(14) ColumnarExchange +Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(15) ShuffleQueryStage +Output [3]: [l_suppkey#X, sum#X, isEmpty#X] +Arguments: X + +(16) InputAdapter +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(17) InputIteratorTransformer +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(18) RegularHashAggregateExecTransformer +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] + +(19) FilterExecTransformer +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) + +(20) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_suppkey#X] +Right keys [1]: [supplier_no#X] +Join type: Inner +Join condition: None + +(21) ProjectExecTransformer +Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] + +(22) WholeStageCodegenTransformer (X) +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: false + +(23) ColumnarExchange +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(24) ShuffleQueryStage +Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: X + +(25) AQEShuffleRead +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: local + +(26) VeloxColumnarToRowExec +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] + +(27) Scan parquet +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey)] +ReadSchema: struct + +(28) Filter +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Condition : isnotnull(s_suppkey#X) + +(29) BroadcastExchange +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(30) Scan parquet +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] +ReadSchema: struct + +(31) Filter +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) + +(32) Project +Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(33) HashAggregate +Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(34) Exchange +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(35) HashAggregate +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] + +(36) Filter +Input [2]: [supplier_no#X, total_revenue#X] +Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) + +(37) BroadcastHashJoin +Left keys [1]: [s_suppkey#X] +Right keys [1]: [supplier_no#X] +Join type: Inner +Join condition: None + +(38) Project +Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] + +(39) Exchange +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(40) Sort +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 + +(41) AdaptiveSparkPlan +Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: isFinalPlan=true + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 19 Hosting Expression = Subquery subquery#X, [id=#X] +AdaptiveSparkPlan (66) ++- == Final Plan == + VeloxColumnarToRowExec (57) + +- ^ RegularHashAggregateExecTransformer (55) + +- ^ RegularHashAggregateExecTransformer (54) + +- ^ ProjectExecTransformer (53) + +- ^ RegularHashAggregateExecTransformer (52) + +- ^ InputIteratorTransformer (51) + +- ^ InputAdapter (50) + +- ^ ShuffleQueryStage (49), Statistics(X) + +- ColumnarExchange (48) + +- ^ ProjectExecTransformer (46) + +- ^ FlushableHashAggregateExecTransformer (45) + +- ^ ProjectExecTransformer (44) + +- ^ FilterExecTransformer (43) + +- ^ Scan parquet (42) ++- == Initial Plan == + HashAggregate (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Exchange (62) + +- HashAggregate (61) + +- Project (60) + +- Filter (59) + +- Scan parquet (58) + + +(42) Scan parquet +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] +ReadSchema: struct + +(43) FilterExecTransformer +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) + +(44) ProjectExecTransformer +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(45) FlushableHashAggregateExecTransformer +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(46) ProjectExecTransformer +Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(47) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: false + +(48) ColumnarExchange +Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(49) ShuffleQueryStage +Output [3]: [l_suppkey#X, sum#X, isEmpty#X] +Arguments: X + +(50) InputAdapter +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(51) InputIteratorTransformer +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(52) RegularHashAggregateExecTransformer +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(53) ProjectExecTransformer +Output [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] +Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(54) RegularHashAggregateExecTransformer +Input [1]: [total_revenue#X] +Keys: [] +Functions [1]: [partial_max(total_revenue#X)] +Aggregate Attributes [1]: [max#X] +Results [1]: [max#X] + +(55) RegularHashAggregateExecTransformer +Input [1]: [max#X] +Keys: [] +Functions [1]: [max(total_revenue#X)] +Aggregate Attributes [1]: [max(total_revenue#X)#X] +Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] + +(56) WholeStageCodegenTransformer (X) +Input [1]: [max(total_revenue)#X] +Arguments: false + +(57) VeloxColumnarToRowExec +Input [1]: [max(total_revenue)#X] + +(58) Scan parquet +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] +ReadSchema: struct + +(59) Filter +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) + +(60) Project +Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(61) HashAggregate +Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(62) Exchange +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(63) HashAggregate +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] + +(64) HashAggregate +Input [1]: [total_revenue#X] +Keys: [] +Functions [1]: [partial_max(total_revenue#X)] +Aggregate Attributes [1]: [max#X] +Results [1]: [max#X] + +(65) HashAggregate +Input [1]: [max#X] +Keys: [] +Functions [1]: [max(total_revenue#X)] +Aggregate Attributes [1]: [max(total_revenue#X)#X] +Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] + +(66) AdaptiveSparkPlan +Output [1]: [max(total_revenue)#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/16.txt new file mode 100644 index 000000000000..9b4e84b1b169 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/16.txt @@ -0,0 +1,315 @@ +== Physical Plan == +AdaptiveSparkPlan (56) ++- == Final Plan == + VeloxColumnarToRowExec (35) + +- ^ SortExecTransformer (33) + +- ^ InputIteratorTransformer (32) + +- ^ InputAdapter (31) + +- ^ ShuffleQueryStage (30), Statistics(X) + +- ColumnarExchange (29) + +- ^ RegularHashAggregateExecTransformer (27) + +- ^ InputIteratorTransformer (26) + +- ^ InputAdapter (25) + +- ^ ShuffleQueryStage (24), Statistics(X) + +- ColumnarExchange (23) + +- ^ ProjectExecTransformer (21) + +- ^ FlushableHashAggregateExecTransformer (20) + +- ^ RegularHashAggregateExecTransformer (19) + +- ^ InputIteratorTransformer (18) + +- ^ InputAdapter (17) + +- ^ ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- ^ ProjectExecTransformer (13) + +- ^ FlushableHashAggregateExecTransformer (12) + +- ^ ProjectExecTransformer (11) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (10) + :- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (9) + +- ^ InputAdapter (8) + +- ^ BroadcastQueryStage (7), Statistics(X) + +- ColumnarBroadcastExchange (6) + +- ^ FilterExecTransformer (4) + +- ^ Scan parquet (3) ++- == Initial Plan == + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Exchange (49) + +- HashAggregate (48) + +- Project (47) + +- BroadcastHashJoin Inner BuildRight (46) + :- BroadcastHashJoin LeftAnti BuildRight (42) + : :- Filter (37) + : : +- Scan parquet (36) + : +- BroadcastExchange (41) + : +- Project (40) + : +- Filter (39) + : +- Scan parquet (38) + +- BroadcastExchange (45) + +- Filter (44) + +- Scan parquet (43) + + +(1) Scan parquet +Output [2]: [ps_partkey#X, ps_suppkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_partkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: isnotnull(ps_partkey#X) + +(3) Scan parquet +Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] +ReadSchema: struct + +(4) FilterExecTransformer +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) + +(5) WholeStageCodegenTransformer (X) +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: false + +(6) ColumnarBroadcastExchange +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(7) BroadcastQueryStage +Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: X + +(8) InputAdapter +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(9) InputIteratorTransformer +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(10) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [ps_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(11) ProjectExecTransformer +Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(12) FlushableHashAggregateExecTransformer +Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] +Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Functions: [] +Aggregate Attributes: [] +Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(13) ProjectExecTransformer +Output [5]: [hash(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 42) AS hash_partition_key#X, p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(14) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Arguments: false + +(15) ColumnarExchange +Input [5]: [hash_partition_key#X, p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [p_brand#X, p_type#X, p_size#X, ps_suppkey#X], [plan_id=X], [id=#X] + +(16) ShuffleQueryStage +Output [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Arguments: X + +(17) InputAdapter +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(18) InputIteratorTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(19) RegularHashAggregateExecTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Functions: [] +Aggregate Attributes: [] +Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(20) FlushableHashAggregateExecTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Keys [3]: [p_brand#X, p_type#X, p_size#X] +Functions [1]: [partial_count(distinct ps_suppkey#X)] +Aggregate Attributes [1]: [count(ps_suppkey#X)#X] +Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(21) ProjectExecTransformer +Output [5]: [hash(p_brand#X, p_type#X, p_size#X, 42) AS hash_partition_key#X, p_brand#X, p_type#X, p_size#X, count#X] +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(22) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, p_brand#X, p_type#X, p_size#X, count#X] +Arguments: false + +(23) ColumnarExchange +Input [5]: [hash_partition_key#X, p_brand#X, p_type#X, p_size#X, count#X] +Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [p_brand#X, p_type#X, p_size#X, count#X], [plan_id=X], [id=#X] + +(24) ShuffleQueryStage +Output [4]: [p_brand#X, p_type#X, p_size#X, count#X] +Arguments: X + +(25) InputAdapter +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(26) InputIteratorTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(27) RegularHashAggregateExecTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] +Keys [3]: [p_brand#X, p_type#X, p_size#X] +Functions [1]: [count(distinct ps_suppkey#X)] +Aggregate Attributes [1]: [count(ps_suppkey#X)#X] +Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] + +(28) WholeStageCodegenTransformer (X) +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: false + +(29) ColumnarExchange +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(30) ShuffleQueryStage +Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: X + +(31) InputAdapter +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] + +(32) InputIteratorTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] + +(33) SortExecTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 + +(34) WholeStageCodegenTransformer (X) +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: false + +(35) VeloxColumnarToRowExec +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] + +(36) Scan parquet +Output [2]: [ps_partkey#X, ps_suppkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_partkey)] +ReadSchema: struct + +(37) Filter +Input [2]: [ps_partkey#X, ps_suppkey#X] +Condition : isnotnull(ps_partkey#X) + +(38) Scan parquet +Output [2]: [s_suppkey#X, s_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_comment)] +ReadSchema: struct + +(39) Filter +Input [2]: [s_suppkey#X, s_comment#X] +Condition : (isnotnull(s_comment#X) AND s_comment#X LIKE %Customer%Complaints%) + +(40) Project +Output [1]: [s_suppkey#X] +Input [2]: [s_suppkey#X, s_comment#X] + +(41) BroadcastExchange +Input [1]: [s_suppkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),true), [plan_id=X] + +(42) BroadcastHashJoin +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: LeftAnti +Join condition: None + +(43) Scan parquet +Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] +ReadSchema: struct + +(44) Filter +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) + +(45) BroadcastExchange +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(46) BroadcastHashJoin +Left keys [1]: [ps_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(47) Project +Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(48) HashAggregate +Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] +Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Functions: [] +Aggregate Attributes: [] +Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(49) Exchange +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(50) HashAggregate +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Functions: [] +Aggregate Attributes: [] +Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(51) HashAggregate +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Keys [3]: [p_brand#X, p_type#X, p_size#X] +Functions [1]: [partial_count(distinct ps_suppkey#X)] +Aggregate Attributes [1]: [count(ps_suppkey#X)#X] +Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(52) Exchange +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] +Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(53) HashAggregate +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] +Keys [3]: [p_brand#X, p_type#X, p_size#X] +Functions [1]: [count(distinct ps_suppkey#X)] +Aggregate Attributes [1]: [count(ps_suppkey#X)#X] +Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] + +(54) Exchange +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(55) Sort +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 + +(56) AdaptiveSparkPlan +Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/17.txt new file mode 100644 index 000000000000..4a2b5a1744e0 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/17.txt @@ -0,0 +1,202 @@ +== Physical Plan == +AdaptiveSparkPlan (35) ++- == Final Plan == + VeloxColumnarToRowExec (14) + +- ^ ProjectExecTransformer (12) + +- ^ RegularHashAggregateExecTransformer (11) + +- ^ InputIteratorTransformer (10) + +- ^ InputAdapter (9) + +- ^ ShuffleQueryStage (8), Statistics(X) + +- ColumnarExchange (7) + +- ^ FlushableHashAggregateExecTransformer (5) + +- ^ InputIteratorTransformer (4) + +- ^ InputAdapter (3) + +- ^ RowToVeloxColumnar (2) + +- ^ LocalTableScan (1) ++- == Initial Plan == + HashAggregate (34) + +- Exchange (33) + +- HashAggregate (32) + +- Project (31) + +- BroadcastHashJoin Inner BuildRight (30) + :- Project (22) + : +- BroadcastHashJoin Inner BuildRight (21) + : :- Filter (16) + : : +- Scan parquet (15) + : +- BroadcastExchange (20) + : +- Project (19) + : +- Filter (18) + : +- Scan parquet (17) + +- BroadcastExchange (29) + +- Filter (28) + +- HashAggregate (27) + +- Exchange (26) + +- HashAggregate (25) + +- Filter (24) + +- Scan parquet (23) + + +(1) LocalTableScan +Output [1]: [l_extendedprice#X] +Arguments: , [l_extendedprice#X] + +(2) RowToVeloxColumnar +Input [1]: [l_extendedprice#X] + +(3) InputAdapter +Input [1]: [l_extendedprice#X] + +(4) InputIteratorTransformer +Input [1]: [l_extendedprice#X] + +(5) FlushableHashAggregateExecTransformer +Input [1]: [l_extendedprice#X] +Keys: [] +Functions [1]: [partial_sum(l_extendedprice#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(6) WholeStageCodegenTransformer (X) +Input [2]: [sum#X, isEmpty#X] +Arguments: false + +(7) ColumnarExchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(8) ShuffleQueryStage +Output [2]: [sum#X, isEmpty#X] +Arguments: X + +(9) InputAdapter +Input [2]: [sum#X, isEmpty#X] + +(10) InputIteratorTransformer +Input [2]: [sum#X, isEmpty#X] + +(11) RegularHashAggregateExecTransformer +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum(l_extendedprice#X)] +Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] +Results [1]: [sum(l_extendedprice#X)#X] + +(12) ProjectExecTransformer +Output [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] +Input [1]: [sum(l_extendedprice#X)#X] + +(13) WholeStageCodegenTransformer (X) +Input [1]: [avg_yearly#X] +Arguments: false + +(14) VeloxColumnarToRowExec +Input [1]: [avg_yearly#X] + +(15) Scan parquet +Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] +ReadSchema: struct + +(16) Filter +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) + +(17) Scan parquet +Output [3]: [p_partkey#X, p_brand#X, p_container#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] +ReadSchema: struct + +(18) Filter +Input [3]: [p_partkey#X, p_brand#X, p_container#X] +Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) + +(19) Project +Output [1]: [p_partkey#X] +Input [3]: [p_partkey#X, p_brand#X, p_container#X] + +(20) BroadcastExchange +Input [1]: [p_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(21) BroadcastHashJoin +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(22) Project +Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] + +(23) Scan parquet +Output [2]: [l_partkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey)] +ReadSchema: struct + +(24) Filter +Input [2]: [l_partkey#X, l_quantity#X] +Condition : isnotnull(l_partkey#X) + +(25) HashAggregate +Input [2]: [l_partkey#X, l_quantity#X] +Keys [1]: [l_partkey#X] +Functions [1]: [partial_avg(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, count#X] +Results [3]: [l_partkey#X, sum#X, count#X] + +(26) Exchange +Input [3]: [l_partkey#X, sum#X, count#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(27) HashAggregate +Input [3]: [l_partkey#X, sum#X, count#X] +Keys [1]: [l_partkey#X] +Functions [1]: [avg(l_quantity#X)] +Aggregate Attributes [1]: [avg(l_quantity#X)#X] +Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] + +(28) Filter +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Condition : isnotnull((0.2 * avg(l_quantity))#X) + +(29) BroadcastExchange +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=X] + +(30) BroadcastHashJoin +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) + +(31) Project +Output [1]: [l_extendedprice#X] +Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] + +(32) HashAggregate +Input [1]: [l_extendedprice#X] +Keys: [] +Functions [1]: [partial_sum(l_extendedprice#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(33) Exchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] + +(34) HashAggregate +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum(l_extendedprice#X)] +Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] +Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] + +(35) AdaptiveSparkPlan +Output [1]: [avg_yearly#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/18.txt new file mode 100644 index 000000000000..15aa10fc24d9 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/18.txt @@ -0,0 +1,484 @@ +== Physical Plan == +AdaptiveSparkPlan (86) ++- == Final Plan == + VeloxColumnarToRowExec (53) + +- TakeOrderedAndProjectExecTransformer (52) + +- ^ RegularHashAggregateExecTransformer (50) + +- ^ InputIteratorTransformer (49) + +- ^ InputAdapter (48) + +- ^ ShuffleQueryStage (47), Statistics(X) + +- ColumnarExchange (46) + +- ^ ProjectExecTransformer (44) + +- ^ FlushableHashAggregateExecTransformer (43) + +- ^ ProjectExecTransformer (42) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (41) + :- ^ ProjectExecTransformer (28) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (27) + : :- ^ InputIteratorTransformer (7) + : : +- ^ InputAdapter (6) + : : +- ^ BroadcastQueryStage (5), Statistics(X) + : : +- ColumnarBroadcastExchange (4) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ GlutenBroadcastHashJoinExecTransformer LeftSemi (26) + : :- ^ FilterExecTransformer (9) + : : +- ^ Scan parquet (8) + : +- ^ InputIteratorTransformer (25) + : +- ^ InputAdapter (24) + : +- ^ BroadcastQueryStage (23), Statistics(X) + : +- ColumnarBroadcastExchange (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ FilterExecTransformer (19) + : +- ^ RegularHashAggregateExecTransformer (18) + : +- ^ InputIteratorTransformer (17) + : +- ^ InputAdapter (16) + : +- ^ ShuffleQueryStage (15), Statistics(X) + : +- ColumnarExchange (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FlushableHashAggregateExecTransformer (11) + : +- ^ Scan parquet (10) + +- ^ InputIteratorTransformer (40) + +- ^ InputAdapter (39) + +- ^ BroadcastQueryStage (38), Statistics(X) + +- ColumnarBroadcastExchange (37) + +- ^ GlutenBroadcastHashJoinExecTransformer LeftSemi (35) + :- ^ FilterExecTransformer (30) + : +- ^ Scan parquet (29) + +- ^ InputIteratorTransformer (34) + +- ^ InputAdapter (33) + +- ^ BroadcastQueryStage (32), Statistics(X) + +- ReusedExchange (31) ++- == Initial Plan == + TakeOrderedAndProject (85) + +- HashAggregate (84) + +- Exchange (83) + +- HashAggregate (82) + +- Project (81) + +- BroadcastHashJoin Inner BuildRight (80) + :- Project (68) + : +- BroadcastHashJoin Inner BuildLeft (67) + : :- BroadcastExchange (56) + : : +- Filter (55) + : : +- Scan parquet (54) + : +- BroadcastHashJoin LeftSemi BuildRight (66) + : :- Filter (58) + : : +- Scan parquet (57) + : +- BroadcastExchange (65) + : +- Project (64) + : +- Filter (63) + : +- HashAggregate (62) + : +- Exchange (61) + : +- HashAggregate (60) + : +- Scan parquet (59) + +- BroadcastExchange (79) + +- BroadcastHashJoin LeftSemi BuildRight (78) + :- Filter (70) + : +- Scan parquet (69) + +- BroadcastExchange (77) + +- Project (76) + +- Filter (75) + +- HashAggregate (74) + +- Exchange (73) + +- HashAggregate (72) + +- Scan parquet (71) + + +(1) Scan parquet +Output [2]: [c_custkey#X, c_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [c_custkey#X, c_name#X] +Arguments: isnotnull(c_custkey#X) + +(3) WholeStageCodegenTransformer (X) +Input [2]: [c_custkey#X, c_name#X] +Arguments: false + +(4) ColumnarBroadcastExchange +Input [2]: [c_custkey#X, c_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(5) BroadcastQueryStage +Output [2]: [c_custkey#X, c_name#X] +Arguments: X + +(6) InputAdapter +Input [2]: [c_custkey#X, c_name#X] + +(7) InputIteratorTransformer +Input [2]: [c_custkey#X, c_name#X] + +(8) Scan parquet +Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(9) FilterExecTransformer +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) + +(10) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(11) FlushableHashAggregateExecTransformer +Input [2]: [l_orderkey#X, l_quantity#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(12) ProjectExecTransformer +Output [4]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, sum#X, isEmpty#X] +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(13) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_orderkey#X, sum#X, isEmpty#X] +Arguments: false + +(14) ColumnarExchange +Input [4]: [hash_partition_key#X, l_orderkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(15) ShuffleQueryStage +Output [3]: [l_orderkey#X, sum#X, isEmpty#X] +Arguments: X + +(16) InputAdapter +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(17) InputIteratorTransformer +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(18) RegularHashAggregateExecTransformer +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] + +(19) FilterExecTransformer +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] +Arguments: (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) + +(20) ProjectExecTransformer +Output [1]: [l_orderkey#X] +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] + +(21) WholeStageCodegenTransformer (X) +Input [1]: [l_orderkey#X] +Arguments: false + +(22) ColumnarBroadcastExchange +Input [1]: [l_orderkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(23) BroadcastQueryStage +Output [1]: [l_orderkey#X] +Arguments: X + +(24) InputAdapter +Input [1]: [l_orderkey#X] + +(25) InputIteratorTransformer +Input [1]: [l_orderkey#X] + +(26) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(27) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(28) ProjectExecTransformer +Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(29) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_orderkey)] +ReadSchema: struct + +(30) FilterExecTransformer +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: isnotnull(l_orderkey#X) + +(31) ReusedExchange [Reuses operator id: 22] +Output [1]: [l_orderkey#X] + +(32) BroadcastQueryStage +Output [1]: [l_orderkey#X] +Arguments: X + +(33) InputAdapter +Input [1]: [l_orderkey#X] + +(34) InputIteratorTransformer +Input [1]: [l_orderkey#X] + +(35) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(36) WholeStageCodegenTransformer (X) +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: false + +(37) ColumnarBroadcastExchange +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(38) BroadcastQueryStage +Output [2]: [l_orderkey#X, l_quantity#X] +Arguments: X + +(39) InputAdapter +Input [2]: [l_orderkey#X, l_quantity#X] + +(40) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_quantity#X] + +(41) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(42) ProjectExecTransformer +Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] +Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] + +(43) FlushableHashAggregateExecTransformer +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] +Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] + +(44) ProjectExecTransformer +Output [8]: [hash(c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, 42) AS hash_partition_key#X, c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] + +(45) WholeStageCodegenTransformer (X) +Input [8]: [hash_partition_key#X, c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Arguments: false + +(46) ColumnarExchange +Input [8]: [hash_partition_key#X, c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, 1), ENSURE_REQUIREMENTS, [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(47) ShuffleQueryStage +Output [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Arguments: X + +(48) InputAdapter +Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] + +(49) InputIteratorTransformer +Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] + +(50) RegularHashAggregateExecTransformer +Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] + +(51) WholeStageCodegenTransformer (X) +Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] +Arguments: false + +(52) TakeOrderedAndProjectExecTransformer +Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] +Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 + +(53) VeloxColumnarToRowExec +Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] + +(54) Scan parquet +Output [2]: [c_custkey#X, c_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey)] +ReadSchema: struct + +(55) Filter +Input [2]: [c_custkey#X, c_name#X] +Condition : isnotnull(c_custkey#X) + +(56) BroadcastExchange +Input [2]: [c_custkey#X, c_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(57) Scan parquet +Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(58) Filter +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) + +(59) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(60) HashAggregate +Input [2]: [l_orderkey#X, l_quantity#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(61) Exchange +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(62) HashAggregate +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] + +(63) Filter +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] +Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) + +(64) Project +Output [1]: [l_orderkey#X] +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] + +(65) BroadcastExchange +Input [1]: [l_orderkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(66) BroadcastHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(67) BroadcastHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(68) Project +Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(69) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_orderkey)] +ReadSchema: struct + +(70) Filter +Input [2]: [l_orderkey#X, l_quantity#X] +Condition : isnotnull(l_orderkey#X) + +(71) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(72) HashAggregate +Input [2]: [l_orderkey#X, l_quantity#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(73) Exchange +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(74) HashAggregate +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] + +(75) Filter +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] +Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) + +(76) Project +Output [1]: [l_orderkey#X] +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] + +(77) BroadcastExchange +Input [1]: [l_orderkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(78) BroadcastHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(79) BroadcastExchange +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(80) BroadcastHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(81) Project +Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] +Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] + +(82) HashAggregate +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] +Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] + +(83) Exchange +Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(84) HashAggregate +Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] + +(85) TakeOrderedAndProject +Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] +Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] + +(86) AdaptiveSparkPlan +Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/19.txt new file mode 100644 index 000000000000..73d8a67ae2df --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/19.txt @@ -0,0 +1,189 @@ +== Physical Plan == +AdaptiveSparkPlan (33) ++- == Final Plan == + VeloxColumnarToRowExec (21) + +- ^ RegularHashAggregateExecTransformer (19) + +- ^ InputIteratorTransformer (18) + +- ^ InputAdapter (17) + +- ^ ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- ^ FlushableHashAggregateExecTransformer (13) + +- ^ ProjectExecTransformer (12) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (11) + :- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (10) + +- ^ InputAdapter (9) + +- ^ BroadcastQueryStage (8), Statistics(X) + +- ColumnarBroadcastExchange (7) + +- ^ FilterExecTransformer (5) + +- ^ Scan parquet (4) ++- == Initial Plan == + HashAggregate (32) + +- Exchange (31) + +- HashAggregate (30) + +- Project (29) + +- BroadcastHashJoin Inner BuildRight (28) + :- Project (24) + : +- Filter (23) + : +- Scan parquet (22) + +- BroadcastExchange (27) + +- Filter (26) + +- Scan parquet (25) + + +(1) Scan parquet +Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] +ReadSchema: struct + +(2) FilterExecTransformer +Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] +Arguments: ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) + +(3) ProjectExecTransformer +Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] + +(4) Scan parquet +Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] +ReadSchema: struct + +(5) FilterExecTransformer +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) + +(6) WholeStageCodegenTransformer (X) +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: false + +(7) ColumnarBroadcastExchange +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(8) BroadcastQueryStage +Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: X + +(9) InputAdapter +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(10) InputIteratorTransformer +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(11) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) + +(12) ProjectExecTransformer +Output [3]: [l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(13) FlushableHashAggregateExecTransformer +Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] +Keys: [] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(14) WholeStageCodegenTransformer (X) +Input [2]: [sum#X, isEmpty#X] +Arguments: false + +(15) ColumnarExchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(16) ShuffleQueryStage +Output [2]: [sum#X, isEmpty#X] +Arguments: X + +(17) InputAdapter +Input [2]: [sum#X, isEmpty#X] + +(18) InputIteratorTransformer +Input [2]: [sum#X, isEmpty#X] + +(19) RegularHashAggregateExecTransformer +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] + +(20) WholeStageCodegenTransformer (X) +Input [1]: [revenue#X] +Arguments: false + +(21) VeloxColumnarToRowExec +Input [1]: [revenue#X] + +(22) Scan parquet +Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] +ReadSchema: struct + +(23) Filter +Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] +Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) + +(24) Project +Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] + +(25) Scan parquet +Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] +ReadSchema: struct + +(26) Filter +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) + +(27) BroadcastExchange +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(28) BroadcastHashJoin +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) + +(29) Project +Output [2]: [l_extendedprice#X, l_discount#X] +Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(30) HashAggregate +Input [2]: [l_extendedprice#X, l_discount#X] +Keys: [] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(31) Exchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] + +(32) HashAggregate +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] + +(33) AdaptiveSparkPlan +Output [1]: [revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/20.txt new file mode 100644 index 000000000000..d7e767109907 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/20.txt @@ -0,0 +1,529 @@ +== Physical Plan == +AdaptiveSparkPlan (96) ++- == Final Plan == + VeloxColumnarToRowExec (60) + +- AQEShuffleRead (59) + +- ShuffleQueryStage (58), Statistics(X) + +- ColumnarExchange (57) + +- ^ ProjectExecTransformer (55) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (54) + :- ^ ProjectExecTransformer (45) + : +- ^ GlutenBroadcastHashJoinExecTransformer LeftSemi (44) + : :- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (43) + : +- ^ InputAdapter (42) + : +- ^ BroadcastQueryStage (41), Statistics(X) + : +- ColumnarBroadcastExchange (40) + : +- ^ ProjectExecTransformer (38) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (37) + : :- ^ InputIteratorTransformer (18) + : : +- ^ InputAdapter (17) + : : +- ^ BroadcastQueryStage (16), Statistics(X) + : : +- ColumnarBroadcastExchange (15) + : : +- ^ GlutenBroadcastHashJoinExecTransformer LeftSemi (13) + : : :- ^ FilterExecTransformer (4) + : : : +- ^ Scan parquet (3) + : : +- ^ InputIteratorTransformer (12) + : : +- ^ InputAdapter (11) + : : +- ^ BroadcastQueryStage (10), Statistics(X) + : : +- ColumnarBroadcastExchange (9) + : : +- ^ ProjectExecTransformer (7) + : : +- ^ FilterExecTransformer (6) + : : +- ^ Scan parquet (5) + : +- ^ FilterExecTransformer (36) + : +- ^ ProjectExecTransformer (35) + : +- ^ RegularHashAggregateExecTransformer (34) + : +- ^ InputIteratorTransformer (33) + : +- ^ InputAdapter (32) + : +- ^ ShuffleQueryStage (31), Statistics(X) + : +- ColumnarExchange (30) + : +- ^ ProjectExecTransformer (28) + : +- ^ FlushableHashAggregateExecTransformer (27) + : +- ^ GlutenBroadcastHashJoinExecTransformer LeftSemi (26) + : :- ^ ProjectExecTransformer (21) + : : +- ^ FilterExecTransformer (20) + : : +- ^ Scan parquet (19) + : +- ^ InputIteratorTransformer (25) + : +- ^ InputAdapter (24) + : +- ^ BroadcastQueryStage (23), Statistics(X) + : +- ReusedExchange (22) + +- ^ InputIteratorTransformer (53) + +- ^ InputAdapter (52) + +- ^ BroadcastQueryStage (51), Statistics(X) + +- ColumnarBroadcastExchange (50) + +- ^ ProjectExecTransformer (48) + +- ^ FilterExecTransformer (47) + +- ^ Scan parquet (46) ++- == Initial Plan == + Sort (95) + +- Exchange (94) + +- Project (93) + +- BroadcastHashJoin Inner BuildRight (92) + :- Project (87) + : +- BroadcastHashJoin LeftSemi BuildRight (86) + : :- Filter (62) + : : +- Scan parquet (61) + : +- BroadcastExchange (85) + : +- Project (84) + : +- BroadcastHashJoin Inner BuildLeft (83) + : :- BroadcastExchange (70) + : : +- BroadcastHashJoin LeftSemi BuildRight (69) + : : :- Filter (64) + : : : +- Scan parquet (63) + : : +- BroadcastExchange (68) + : : +- Project (67) + : : +- Filter (66) + : : +- Scan parquet (65) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- BroadcastHashJoin LeftSemi BuildRight (78) + : :- Project (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- BroadcastExchange (77) + : +- Project (76) + : +- Filter (75) + : +- Scan parquet (74) + +- BroadcastExchange (91) + +- Project (90) + +- Filter (89) + +- Scan parquet (88) + + +(1) Scan parquet +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: isnotnull(s_nationkey#X) + +(3) Scan parquet +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] +ReadSchema: struct + +(4) FilterExecTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) + +(5) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] +ReadSchema: struct + +(6) FilterExecTransformer +Input [2]: [p_partkey#X, p_name#X] +Arguments: (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) + +(7) ProjectExecTransformer +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(8) WholeStageCodegenTransformer (X) +Input [1]: [p_partkey#X] +Arguments: false + +(9) ColumnarBroadcastExchange +Input [1]: [p_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(10) BroadcastQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(11) InputAdapter +Input [1]: [p_partkey#X] + +(12) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(13) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [ps_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: LeftSemi +Join condition: None + +(14) WholeStageCodegenTransformer (X) +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: false + +(15) ColumnarBroadcastExchange +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] + +(16) BroadcastQueryStage +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: X + +(17) InputAdapter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] + +(18) InputIteratorTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] + +(19) Scan parquet +Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] +ReadSchema: struct + +(20) FilterExecTransformer +Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] +Arguments: ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) + +(21) ProjectExecTransformer +Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] + +(22) ReusedExchange [Reuses operator id: 9] +Output [1]: [p_partkey#X] + +(23) BroadcastQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(24) InputAdapter +Input [1]: [p_partkey#X] + +(25) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(26) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: LeftSemi +Join condition: None + +(27) FlushableHashAggregateExecTransformer +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Keys [2]: [l_partkey#X, l_suppkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] + +(28) ProjectExecTransformer +Output [5]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] + +(29) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: false + +(30) ColumnarExchange +Input [5]: [hash_partition_key#X, l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(31) ShuffleQueryStage +Output [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: X + +(32) InputAdapter +Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] + +(33) InputIteratorTransformer +Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] + +(34) RegularHashAggregateExecTransformer +Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Keys [2]: [l_partkey#X, l_suppkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] + +(35) ProjectExecTransformer +Output [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] + +(36) FilterExecTransformer +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: isnotnull((0.5 * sum(l_quantity))#X) + +(37) GlutenBroadcastHashJoinExecTransformer +Left keys [2]: [ps_partkey#X, ps_suppkey#X] +Right keys [2]: [l_partkey#X, l_suppkey#X] +Join type: Inner +Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) + +(38) ProjectExecTransformer +Output [1]: [ps_suppkey#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(39) WholeStageCodegenTransformer (X) +Input [1]: [ps_suppkey#X] +Arguments: false + +(40) ColumnarBroadcastExchange +Input [1]: [ps_suppkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(41) BroadcastQueryStage +Output [1]: [ps_suppkey#X] +Arguments: X + +(42) InputAdapter +Input [1]: [ps_suppkey#X] + +(43) InputIteratorTransformer +Input [1]: [ps_suppkey#X] + +(44) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_suppkey#X] +Right keys [1]: [ps_suppkey#X] +Join type: LeftSemi +Join condition: None + +(45) ProjectExecTransformer +Output [3]: [s_name#X, s_address#X, s_nationkey#X] +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] + +(46) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] +ReadSchema: struct + +(47) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) + +(48) ProjectExecTransformer +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(49) WholeStageCodegenTransformer (X) +Input [1]: [n_nationkey#X] +Arguments: false + +(50) ColumnarBroadcastExchange +Input [1]: [n_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(51) BroadcastQueryStage +Output [1]: [n_nationkey#X] +Arguments: X + +(52) InputAdapter +Input [1]: [n_nationkey#X] + +(53) InputIteratorTransformer +Input [1]: [n_nationkey#X] + +(54) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(55) ProjectExecTransformer +Output [2]: [s_name#X, s_address#X] +Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] + +(56) WholeStageCodegenTransformer (X) +Input [2]: [s_name#X, s_address#X] +Arguments: false + +(57) ColumnarExchange +Input [2]: [s_name#X, s_address#X] +Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(58) ShuffleQueryStage +Output [2]: [s_name#X, s_address#X] +Arguments: X + +(59) AQEShuffleRead +Input [2]: [s_name#X, s_address#X] +Arguments: local + +(60) VeloxColumnarToRowExec +Input [2]: [s_name#X, s_address#X] + +(61) Scan parquet +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_nationkey)] +ReadSchema: struct + +(62) Filter +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Condition : isnotnull(s_nationkey#X) + +(63) Scan parquet +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] +ReadSchema: struct + +(64) Filter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) + +(65) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] +ReadSchema: struct + +(66) Filter +Input [2]: [p_partkey#X, p_name#X] +Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) + +(67) Project +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(68) BroadcastExchange +Input [1]: [p_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(69) BroadcastHashJoin +Left keys [1]: [ps_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: LeftSemi +Join condition: None + +(70) BroadcastExchange +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] + +(71) Scan parquet +Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] +ReadSchema: struct + +(72) Filter +Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] +Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) + +(73) Project +Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] + +(74) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] +ReadSchema: struct + +(75) Filter +Input [2]: [p_partkey#X, p_name#X] +Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) + +(76) Project +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(77) BroadcastExchange +Input [1]: [p_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(78) BroadcastHashJoin +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: LeftSemi +Join condition: None + +(79) HashAggregate +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Keys [2]: [l_partkey#X, l_suppkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] + +(80) Exchange +Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(81) HashAggregate +Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Keys [2]: [l_partkey#X, l_suppkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(82) Filter +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Condition : isnotnull((0.5 * sum(l_quantity))#X) + +(83) BroadcastHashJoin +Left keys [2]: [ps_partkey#X, ps_suppkey#X] +Right keys [2]: [l_partkey#X, l_suppkey#X] +Join type: Inner +Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) + +(84) Project +Output [1]: [ps_suppkey#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(85) BroadcastExchange +Input [1]: [ps_suppkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(86) BroadcastHashJoin +Left keys [1]: [s_suppkey#X] +Right keys [1]: [ps_suppkey#X] +Join type: LeftSemi +Join condition: None + +(87) Project +Output [3]: [s_name#X, s_address#X, s_nationkey#X] +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] + +(88) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] +ReadSchema: struct + +(89) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) + +(90) Project +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(91) BroadcastExchange +Input [1]: [n_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(92) BroadcastHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(93) Project +Output [2]: [s_name#X, s_address#X] +Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] + +(94) Exchange +Input [2]: [s_name#X, s_address#X] +Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(95) Sort +Input [2]: [s_name#X, s_address#X] +Arguments: [s_name#X ASC NULLS FIRST], true, 0 + +(96) AdaptiveSparkPlan +Output [2]: [s_name#X, s_address#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/21.txt new file mode 100644 index 000000000000..71405c90ff05 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/21.txt @@ -0,0 +1,505 @@ +== Physical Plan == +AdaptiveSparkPlan (91) ++- == Final Plan == + VeloxColumnarToRowExec (58) + +- ^ RegularHashAggregateExecTransformer (56) + +- ^ InputIteratorTransformer (55) + +- ^ InputAdapter (54) + +- ^ ShuffleQueryStage (53), Statistics(X) + +- ColumnarExchange (52) + +- ^ ProjectExecTransformer (50) + +- ^ FlushableHashAggregateExecTransformer (49) + +- ^ ProjectExecTransformer (48) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (47) + :- ^ ProjectExecTransformer (38) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (37) + : :- ^ ProjectExecTransformer (28) + : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (27) + : : :- ^ InputIteratorTransformer (7) + : : : +- ^ InputAdapter (6) + : : : +- ^ BroadcastQueryStage (5), Statistics(X) + : : : +- ColumnarBroadcastExchange (4) + : : : +- ^ FilterExecTransformer (2) + : : : +- ^ Scan parquet (1) + : : +- ^ GlutenBroadcastHashJoinExecTransformer LeftAnti (26) + : : :- ^ GlutenBroadcastHashJoinExecTransformer LeftSemi (17) + : : : :- ^ ProjectExecTransformer (10) + : : : : +- ^ FilterExecTransformer (9) + : : : : +- ^ Scan parquet (8) + : : : +- ^ InputIteratorTransformer (16) + : : : +- ^ InputAdapter (15) + : : : +- ^ BroadcastQueryStage (14), Statistics(X) + : : : +- ColumnarBroadcastExchange (13) + : : : +- ^ Scan parquet (11) + : : +- ^ InputIteratorTransformer (25) + : : +- ^ InputAdapter (24) + : : +- ^ BroadcastQueryStage (23), Statistics(X) + : : +- ColumnarBroadcastExchange (22) + : : +- ^ ProjectExecTransformer (20) + : : +- ^ FilterExecTransformer (19) + : : +- ^ Scan parquet (18) + : +- ^ InputIteratorTransformer (36) + : +- ^ InputAdapter (35) + : +- ^ BroadcastQueryStage (34), Statistics(X) + : +- ColumnarBroadcastExchange (33) + : +- ^ ProjectExecTransformer (31) + : +- ^ FilterExecTransformer (30) + : +- ^ Scan parquet (29) + +- ^ InputIteratorTransformer (46) + +- ^ InputAdapter (45) + +- ^ BroadcastQueryStage (44), Statistics(X) + +- ColumnarBroadcastExchange (43) + +- ^ ProjectExecTransformer (41) + +- ^ FilterExecTransformer (40) + +- ^ Scan parquet (39) ++- == Initial Plan == + TakeOrderedAndProject (90) + +- HashAggregate (89) + +- Exchange (88) + +- HashAggregate (87) + +- Project (86) + +- BroadcastHashJoin Inner BuildRight (85) + :- Project (80) + : +- BroadcastHashJoin Inner BuildRight (79) + : :- Project (74) + : : +- BroadcastHashJoin Inner BuildLeft (73) + : : :- BroadcastExchange (61) + : : : +- Filter (60) + : : : +- Scan parquet (59) + : : +- BroadcastHashJoin LeftAnti BuildRight (72) + : : :- BroadcastHashJoin LeftSemi BuildRight (67) + : : : :- Project (64) + : : : : +- Filter (63) + : : : : +- Scan parquet (62) + : : : +- BroadcastExchange (66) + : : : +- Scan parquet (65) + : : +- BroadcastExchange (71) + : : +- Project (70) + : : +- Filter (69) + : : +- Scan parquet (68) + : +- BroadcastExchange (78) + : +- Project (77) + : +- Filter (76) + : +- Scan parquet (75) + +- BroadcastExchange (84) + +- Project (83) + +- Filter (82) + +- Scan parquet (81) + + +(1) Scan parquet +Output [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(3) WholeStageCodegenTransformer (X) +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: false + +(4) ColumnarBroadcastExchange +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(5) BroadcastQueryStage +Output [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: X + +(6) InputAdapter +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] + +(7) InputIteratorTransformer +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] + +(8) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(9) FilterExecTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Arguments: ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(10) ProjectExecTransformer +Output [2]: [l_orderkey#X, l_suppkey#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] + +(11) Scan parquet +Output [2]: [l_orderkey#X, l_suppkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(12) WholeStageCodegenTransformer (X) +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: false + +(13) ColumnarBroadcastExchange +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(14) BroadcastQueryStage +Output [2]: [l_orderkey#X, l_suppkey#X] +Arguments: X + +(15) InputAdapter +Input [2]: [l_orderkey#X, l_suppkey#X] + +(16) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_suppkey#X] + +(17) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: NOT (l_suppkey#X = l_suppkey#X) + +(18) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] +ReadSchema: struct + +(19) FilterExecTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Arguments: ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) + +(20) ProjectExecTransformer +Output [2]: [l_orderkey#X, l_suppkey#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] + +(21) WholeStageCodegenTransformer (X) +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: false + +(22) ColumnarBroadcastExchange +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(23) BroadcastQueryStage +Output [2]: [l_orderkey#X, l_suppkey#X] +Arguments: X + +(24) InputAdapter +Input [2]: [l_orderkey#X, l_suppkey#X] + +(25) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_suppkey#X] + +(26) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftAnti +Join condition: NOT (l_suppkey#X = l_suppkey#X) + +(27) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_suppkey#X] +Right keys [1]: [l_suppkey#X] +Join type: Inner +Join condition: None + +(28) ProjectExecTransformer +Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] + +(29) Scan parquet +Output [2]: [o_orderkey#X, o_orderstatus#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] +ReadSchema: struct + +(30) FilterExecTransformer +Input [2]: [o_orderkey#X, o_orderstatus#X] +Arguments: ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) + +(31) ProjectExecTransformer +Output [1]: [o_orderkey#X] +Input [2]: [o_orderkey#X, o_orderstatus#X] + +(32) WholeStageCodegenTransformer (X) +Input [1]: [o_orderkey#X] +Arguments: false + +(33) ColumnarBroadcastExchange +Input [1]: [o_orderkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(34) BroadcastQueryStage +Output [1]: [o_orderkey#X] +Arguments: X + +(35) InputAdapter +Input [1]: [o_orderkey#X] + +(36) InputIteratorTransformer +Input [1]: [o_orderkey#X] + +(37) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(38) ProjectExecTransformer +Output [2]: [s_name#X, s_nationkey#X] +Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] + +(39) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] +ReadSchema: struct + +(40) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) + +(41) ProjectExecTransformer +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(42) WholeStageCodegenTransformer (X) +Input [1]: [n_nationkey#X] +Arguments: false + +(43) ColumnarBroadcastExchange +Input [1]: [n_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(44) BroadcastQueryStage +Output [1]: [n_nationkey#X] +Arguments: X + +(45) InputAdapter +Input [1]: [n_nationkey#X] + +(46) InputIteratorTransformer +Input [1]: [n_nationkey#X] + +(47) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(48) ProjectExecTransformer +Output [1]: [s_name#X] +Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] + +(49) FlushableHashAggregateExecTransformer +Input [1]: [s_name#X] +Keys [1]: [s_name#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [s_name#X, count#X] + +(50) ProjectExecTransformer +Output [3]: [hash(s_name#X, 42) AS hash_partition_key#X, s_name#X, count#X] +Input [2]: [s_name#X, count#X] + +(51) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, s_name#X, count#X] +Arguments: false + +(52) ColumnarExchange +Input [3]: [hash_partition_key#X, s_name#X, count#X] +Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [s_name#X, count#X], [plan_id=X], [id=#X] + +(53) ShuffleQueryStage +Output [2]: [s_name#X, count#X] +Arguments: X + +(54) InputAdapter +Input [2]: [s_name#X, count#X] + +(55) InputIteratorTransformer +Input [2]: [s_name#X, count#X] + +(56) RegularHashAggregateExecTransformer +Input [2]: [s_name#X, count#X] +Keys [1]: [s_name#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [s_name#X, count(1)#X AS numwait#X] + +(57) WholeStageCodegenTransformer (X) +Input [2]: [s_name#X, numwait#X] +Arguments: false + +(58) VeloxColumnarToRowExec +Input [2]: [s_name#X, numwait#X] + +(59) Scan parquet +Output [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(60) Filter +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(61) BroadcastExchange +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(62) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(63) Filter +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(64) Project +Output [2]: [l_orderkey#X, l_suppkey#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] + +(65) Scan parquet +Output [2]: [l_orderkey#X, l_suppkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(66) BroadcastExchange +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(67) BroadcastHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: NOT (l_suppkey#X = l_suppkey#X) + +(68) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] +ReadSchema: struct + +(69) Filter +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) + +(70) Project +Output [2]: [l_orderkey#X, l_suppkey#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] + +(71) BroadcastExchange +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(72) BroadcastHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftAnti +Join condition: NOT (l_suppkey#X = l_suppkey#X) + +(73) BroadcastHashJoin +Left keys [1]: [s_suppkey#X] +Right keys [1]: [l_suppkey#X] +Join type: Inner +Join condition: None + +(74) Project +Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] + +(75) Scan parquet +Output [2]: [o_orderkey#X, o_orderstatus#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] +ReadSchema: struct + +(76) Filter +Input [2]: [o_orderkey#X, o_orderstatus#X] +Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) + +(77) Project +Output [1]: [o_orderkey#X] +Input [2]: [o_orderkey#X, o_orderstatus#X] + +(78) BroadcastExchange +Input [1]: [o_orderkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(79) BroadcastHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(80) Project +Output [2]: [s_name#X, s_nationkey#X] +Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] + +(81) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] +ReadSchema: struct + +(82) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) + +(83) Project +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(84) BroadcastExchange +Input [1]: [n_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(85) BroadcastHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(86) Project +Output [1]: [s_name#X] +Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] + +(87) HashAggregate +Input [1]: [s_name#X] +Keys [1]: [s_name#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [s_name#X, count#X] + +(88) Exchange +Input [2]: [s_name#X, count#X] +Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(89) HashAggregate +Input [2]: [s_name#X, count#X] +Keys [1]: [s_name#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [s_name#X, count(1)#X AS numwait#X] + +(90) TakeOrderedAndProject +Input [2]: [s_name#X, numwait#X] +Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] + +(91) AdaptiveSparkPlan +Output [2]: [s_name#X, numwait#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/22.txt new file mode 100644 index 000000000000..331a85234454 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/22.txt @@ -0,0 +1,345 @@ +== Physical Plan == +AdaptiveSparkPlan (38) ++- == Final Plan == + VeloxColumnarToRowExec (26) + +- ^ SortExecTransformer (24) + +- ^ InputIteratorTransformer (23) + +- ^ InputAdapter (22) + +- ^ ShuffleQueryStage (21), Statistics(X) + +- ColumnarExchange (20) + +- ^ RegularHashAggregateExecTransformer (18) + +- ^ InputIteratorTransformer (17) + +- ^ InputAdapter (16) + +- ^ ShuffleQueryStage (15), Statistics(X) + +- ColumnarExchange (14) + +- ^ ProjectExecTransformer (12) + +- ^ FlushableHashAggregateExecTransformer (11) + +- ^ ProjectExecTransformer (10) + +- ^ GlutenBroadcastHashJoinExecTransformer LeftAnti (9) + :- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (8) + +- ^ InputAdapter (7) + +- ^ BroadcastQueryStage (6), Statistics(X) + +- ColumnarBroadcastExchange (5) + +- ^ Scan parquet (3) ++- == Initial Plan == + Sort (37) + +- Exchange (36) + +- HashAggregate (35) + +- Exchange (34) + +- HashAggregate (33) + +- Project (32) + +- BroadcastHashJoin LeftAnti BuildRight (31) + :- Filter (28) + : +- Scan parquet (27) + +- BroadcastExchange (30) + +- Scan parquet (29) + + +(1) Scan parquet +Output [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_acctbal)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) AND (cast(c_acctbal#X as decimal(16,6)) > Subquery subquery#X, [id=#X])) + +(3) Scan parquet +Output [1]: [o_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(4) WholeStageCodegenTransformer (X) +Input [1]: [o_custkey#X] +Arguments: false + +(5) ColumnarBroadcastExchange +Input [1]: [o_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(6) BroadcastQueryStage +Output [1]: [o_custkey#X] +Arguments: X + +(7) InputAdapter +Input [1]: [o_custkey#X] + +(8) InputIteratorTransformer +Input [1]: [o_custkey#X] + +(9) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: LeftAnti +Join condition: None + +(10) ProjectExecTransformer +Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] + +(11) FlushableHashAggregateExecTransformer +Input [2]: [cntrycode#X, c_acctbal#X] +Keys [1]: [cntrycode#X] +Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] +Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] +Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(12) ProjectExecTransformer +Output [5]: [hash(cntrycode#X, 42) AS hash_partition_key#X, cntrycode#X, count#X, sum#X, isEmpty#X] +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(13) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, cntrycode#X, count#X, sum#X, isEmpty#X] +Arguments: false + +(14) ColumnarExchange +Input [5]: [hash_partition_key#X, cntrycode#X, count#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [cntrycode#X, count#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(15) ShuffleQueryStage +Output [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] +Arguments: X + +(16) InputAdapter +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(17) InputIteratorTransformer +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(18) RegularHashAggregateExecTransformer +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] +Keys [1]: [cntrycode#X] +Functions [2]: [count(1), sum(c_acctbal#X)] +Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] +Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] + +(19) WholeStageCodegenTransformer (X) +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: false + +(20) ColumnarExchange +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: X + +(22) InputAdapter +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] + +(23) InputIteratorTransformer +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] + +(24) SortExecTransformer +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 + +(25) WholeStageCodegenTransformer (X) +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: false + +(26) VeloxColumnarToRowExec +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] + +(27) Scan parquet +Output [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_acctbal)] +ReadSchema: struct + +(28) Filter +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) AND (cast(c_acctbal#X as decimal(16,6)) > Subquery subquery#X, [id=#X])) + +(29) Scan parquet +Output [1]: [o_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(30) BroadcastExchange +Input [1]: [o_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(31) BroadcastHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: LeftAnti +Join condition: None + +(32) Project +Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] + +(33) HashAggregate +Input [2]: [cntrycode#X, c_acctbal#X] +Keys [1]: [cntrycode#X] +Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] +Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] +Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(34) Exchange +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(35) HashAggregate +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] +Keys [1]: [cntrycode#X] +Functions [2]: [count(1), sum(c_acctbal#X)] +Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] +Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] + +(36) Exchange +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(37) Sort +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 + +(38) AdaptiveSparkPlan +Output [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: isFinalPlan=true + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#X, [id=#X] +AdaptiveSparkPlan (57) ++- == Final Plan == + VeloxColumnarToRowExec (50) + +- ^ RegularHashAggregateExecTransformer (48) + +- ^ InputIteratorTransformer (47) + +- ^ InputAdapter (46) + +- ^ ShuffleQueryStage (45), Statistics(X) + +- ColumnarExchange (44) + +- ^ FlushableHashAggregateExecTransformer (42) + +- ^ ProjectExecTransformer (41) + +- ^ FilterExecTransformer (40) + +- ^ Scan parquet (39) ++- == Initial Plan == + HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Project (53) + +- Filter (52) + +- Scan parquet (51) + + +(39) Scan parquet +Output [2]: [c_phone#X, c_acctbal#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] +ReadSchema: struct + +(40) FilterExecTransformer +Input [2]: [c_phone#X, c_acctbal#X] +Arguments: ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) + +(41) ProjectExecTransformer +Output [1]: [c_acctbal#X] +Input [2]: [c_phone#X, c_acctbal#X] + +(42) FlushableHashAggregateExecTransformer +Input [1]: [c_acctbal#X] +Keys: [] +Functions [1]: [partial_avg(c_acctbal#X)] +Aggregate Attributes [2]: [sum#X, count#X] +Results [2]: [sum#X, count#X] + +(43) WholeStageCodegenTransformer (X) +Input [2]: [sum#X, count#X] +Arguments: false + +(44) ColumnarExchange +Input [2]: [sum#X, count#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(45) ShuffleQueryStage +Output [2]: [sum#X, count#X] +Arguments: X + +(46) InputAdapter +Input [2]: [sum#X, count#X] + +(47) InputIteratorTransformer +Input [2]: [sum#X, count#X] + +(48) RegularHashAggregateExecTransformer +Input [2]: [sum#X, count#X] +Keys: [] +Functions [1]: [avg(c_acctbal#X)] +Aggregate Attributes [1]: [avg(c_acctbal#X)#X] +Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] + +(49) WholeStageCodegenTransformer (X) +Input [1]: [avg(c_acctbal)#X] +Arguments: false + +(50) VeloxColumnarToRowExec +Input [1]: [avg(c_acctbal)#X] + +(51) Scan parquet +Output [2]: [c_phone#X, c_acctbal#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] +ReadSchema: struct + +(52) Filter +Input [2]: [c_phone#X, c_acctbal#X] +Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) + +(53) Project +Output [1]: [c_acctbal#X] +Input [2]: [c_phone#X, c_acctbal#X] + +(54) HashAggregate +Input [1]: [c_acctbal#X] +Keys: [] +Functions [1]: [partial_avg(c_acctbal#X)] +Aggregate Attributes [2]: [sum#X, count#X] +Results [2]: [sum#X, count#X] + +(55) Exchange +Input [2]: [sum#X, count#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] + +(56) HashAggregate +Input [2]: [sum#X, count#X] +Keys: [] +Functions [1]: [avg(c_acctbal#X)] +Aggregate Attributes [1]: [avg(c_acctbal#X)#X] +Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] + +(57) AdaptiveSparkPlan +Output [1]: [avg(c_acctbal)#X] +Arguments: isFinalPlan=true + +Subquery:2 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] +AdaptiveSparkPlan (57) ++- == Final Plan == + VeloxColumnarToRowExec (50) + +- ^ RegularHashAggregateExecTransformer (48) + +- ^ InputIteratorTransformer (47) + +- ^ InputAdapter (46) + +- ^ ShuffleQueryStage (45), Statistics(X) + +- ColumnarExchange (44) + +- ^ FlushableHashAggregateExecTransformer (42) + +- ^ ProjectExecTransformer (41) + +- ^ FilterExecTransformer (40) + +- ^ Scan parquet (39) ++- == Initial Plan == + HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Project (53) + +- Filter (52) + +- Scan parquet (51) \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/3.txt new file mode 100644 index 000000000000..ad4899f6226d --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/3.txt @@ -0,0 +1,296 @@ +== Physical Plan == +AdaptiveSparkPlan (53) ++- == Final Plan == + VeloxColumnarToRowExec (34) + +- TakeOrderedAndProjectExecTransformer (33) + +- ^ ProjectExecTransformer (31) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ^ InputAdapter (28) + +- ^ ShuffleQueryStage (27), Statistics(X) + +- ColumnarExchange (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ ProjectExecTransformer (22) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (21) + :- ^ ProjectExecTransformer (12) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (11) + : :- ^ InputIteratorTransformer (8) + : : +- ^ InputAdapter (7) + : : +- ^ BroadcastQueryStage (6), Statistics(X) + : : +- ColumnarBroadcastExchange (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ FilterExecTransformer (10) + : +- ^ Scan parquet (9) + +- ^ InputIteratorTransformer (20) + +- ^ InputAdapter (19) + +- ^ BroadcastQueryStage (18), Statistics(X) + +- ColumnarBroadcastExchange (17) + +- ^ ProjectExecTransformer (15) + +- ^ FilterExecTransformer (14) + +- ^ Scan parquet (13) ++- == Initial Plan == + TakeOrderedAndProject (52) + +- HashAggregate (51) + +- Exchange (50) + +- HashAggregate (49) + +- Project (48) + +- BroadcastHashJoin Inner BuildRight (47) + :- Project (42) + : +- BroadcastHashJoin Inner BuildLeft (41) + : :- BroadcastExchange (38) + : : +- Project (37) + : : +- Filter (36) + : : +- Scan parquet (35) + : +- Filter (40) + : +- Scan parquet (39) + +- BroadcastExchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) + + +(1) Scan parquet +Output [2]: [c_custkey#X, c_mktsegment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [c_custkey#X, c_mktsegment#X] +Arguments: ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) + +(3) ProjectExecTransformer +Output [1]: [c_custkey#X] +Input [2]: [c_custkey#X, c_mktsegment#X] + +(4) WholeStageCodegenTransformer (X) +Input [1]: [c_custkey#X] +Arguments: false + +(5) ColumnarBroadcastExchange +Input [1]: [c_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(6) BroadcastQueryStage +Output [1]: [c_custkey#X] +Arguments: X + +(7) InputAdapter +Input [1]: [c_custkey#X] + +(8) InputIteratorTransformer +Input [1]: [c_custkey#X] + +(9) Scan parquet +Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(11) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(12) ProjectExecTransformer +Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] + +(13) Scan parquet +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] +ReadSchema: struct + +(14) FilterExecTransformer +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) + +(15) ProjectExecTransformer +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(16) WholeStageCodegenTransformer (X) +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(17) ColumnarBroadcastExchange +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(18) BroadcastQueryStage +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(19) InputAdapter +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] + +(20) InputIteratorTransformer +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] + +(21) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(22) ProjectExecTransformer +Output [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] + +(23) FlushableHashAggregateExecTransformer +Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] +Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] + +(24) ProjectExecTransformer +Output [6]: [hash(l_orderkey#X, o_orderdate#X, o_shippriority#X, 42) AS hash_partition_key#X, l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] + +(25) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Arguments: false + +(26) ColumnarExchange +Input [6]: [hash_partition_key#X, l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_orderkey#X, o_orderdate#X, o_shippriority#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(27) ShuffleQueryStage +Output [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Arguments: X + +(28) InputAdapter +Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] + +(29) InputIteratorTransformer +Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] + +(30) RegularHashAggregateExecTransformer +Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(31) ProjectExecTransformer +Output [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] +Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(32) WholeStageCodegenTransformer (X) +Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] +Arguments: false + +(33) TakeOrderedAndProjectExecTransformer +Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] +Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 + +(34) VeloxColumnarToRowExec +Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] + +(35) Scan parquet +Output [2]: [c_custkey#X, c_mktsegment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] +ReadSchema: struct + +(36) Filter +Input [2]: [c_custkey#X, c_mktsegment#X] +Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) + +(37) Project +Output [1]: [c_custkey#X] +Input [2]: [c_custkey#X, c_mktsegment#X] + +(38) BroadcastExchange +Input [1]: [c_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(39) Scan parquet +Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(40) Filter +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(41) BroadcastHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(42) Project +Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] + +(43) Scan parquet +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] +ReadSchema: struct + +(44) Filter +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) + +(45) Project +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(46) BroadcastExchange +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(47) BroadcastHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(48) Project +Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] + +(49) HashAggregate +Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] + +(50) Exchange +Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_orderkey#X, o_orderdate#X, o_shippriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(51) HashAggregate +Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] + +(52) TakeOrderedAndProject +Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] +Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] + +(53) AdaptiveSparkPlan +Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/4.txt new file mode 100644 index 000000000000..5a9ae8b56e2f --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/4.txt @@ -0,0 +1,241 @@ +== Physical Plan == +AdaptiveSparkPlan (44) ++- == Final Plan == + VeloxColumnarToRowExec (29) + +- ^ SortExecTransformer (27) + +- ^ InputIteratorTransformer (26) + +- ^ InputAdapter (25) + +- ^ ShuffleQueryStage (24), Statistics(X) + +- ColumnarExchange (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ InputIteratorTransformer (20) + +- ^ InputAdapter (19) + +- ^ ShuffleQueryStage (18), Statistics(X) + +- ColumnarExchange (17) + +- ^ ProjectExecTransformer (15) + +- ^ FlushableHashAggregateExecTransformer (14) + +- ^ ProjectExecTransformer (13) + +- ^ GlutenBroadcastHashJoinExecTransformer LeftSemi (12) + :- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (11) + +- ^ InputAdapter (10) + +- ^ BroadcastQueryStage (9), Statistics(X) + +- ColumnarBroadcastExchange (8) + +- ^ ProjectExecTransformer (6) + +- ^ FilterExecTransformer (5) + +- ^ Scan parquet (4) ++- == Initial Plan == + Sort (43) + +- Exchange (42) + +- HashAggregate (41) + +- Exchange (40) + +- HashAggregate (39) + +- Project (38) + +- BroadcastHashJoin LeftSemi BuildRight (37) + :- Project (32) + : +- Filter (31) + : +- Scan parquet (30) + +- BroadcastExchange (36) + +- Project (35) + +- Filter (34) + +- Scan parquet (33) + + +(1) Scan parquet +Output [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-07-01), LessThan(o_orderdate,1993-10-01)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] +Arguments: ((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-07-01)) AND (o_orderdate#X < 1993-10-01)) + +(3) ProjectExecTransformer +Output [2]: [o_orderkey#X, o_orderpriority#X] +Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] + +(4) Scan parquet +Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] +ReadSchema: struct + +(5) FilterExecTransformer +Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] +Arguments: ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) + +(6) ProjectExecTransformer +Output [1]: [l_orderkey#X] +Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] + +(7) WholeStageCodegenTransformer (X) +Input [1]: [l_orderkey#X] +Arguments: false + +(8) ColumnarBroadcastExchange +Input [1]: [l_orderkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(9) BroadcastQueryStage +Output [1]: [l_orderkey#X] +Arguments: X + +(10) InputAdapter +Input [1]: [l_orderkey#X] + +(11) InputIteratorTransformer +Input [1]: [l_orderkey#X] + +(12) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(13) ProjectExecTransformer +Output [1]: [o_orderpriority#X] +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(14) FlushableHashAggregateExecTransformer +Input [1]: [o_orderpriority#X] +Keys [1]: [o_orderpriority#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [o_orderpriority#X, count#X] + +(15) ProjectExecTransformer +Output [3]: [hash(o_orderpriority#X, 42) AS hash_partition_key#X, o_orderpriority#X, count#X] +Input [2]: [o_orderpriority#X, count#X] + +(16) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderpriority#X, count#X] +Arguments: false + +(17) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderpriority#X, count#X] +Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [o_orderpriority#X, count#X], [plan_id=X], [id=#X] + +(18) ShuffleQueryStage +Output [2]: [o_orderpriority#X, count#X] +Arguments: X + +(19) InputAdapter +Input [2]: [o_orderpriority#X, count#X] + +(20) InputIteratorTransformer +Input [2]: [o_orderpriority#X, count#X] + +(21) RegularHashAggregateExecTransformer +Input [2]: [o_orderpriority#X, count#X] +Keys [1]: [o_orderpriority#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] + +(22) WholeStageCodegenTransformer (X) +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: false + +(23) ColumnarExchange +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(24) ShuffleQueryStage +Output [2]: [o_orderpriority#X, order_count#X] +Arguments: X + +(25) InputAdapter +Input [2]: [o_orderpriority#X, order_count#X] + +(26) InputIteratorTransformer +Input [2]: [o_orderpriority#X, order_count#X] + +(27) SortExecTransformer +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 + +(28) WholeStageCodegenTransformer (X) +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: false + +(29) VeloxColumnarToRowExec +Input [2]: [o_orderpriority#X, order_count#X] + +(30) Scan parquet +Output [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-07-01), LessThan(o_orderdate,1993-10-01)] +ReadSchema: struct + +(31) Filter +Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] +Condition : ((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-07-01)) AND (o_orderdate#X < 1993-10-01)) + +(32) Project +Output [2]: [o_orderkey#X, o_orderpriority#X] +Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] + +(33) Scan parquet +Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] +ReadSchema: struct + +(34) Filter +Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] +Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) + +(35) Project +Output [1]: [l_orderkey#X] +Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] + +(36) BroadcastExchange +Input [1]: [l_orderkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(37) BroadcastHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(38) Project +Output [1]: [o_orderpriority#X] +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(39) HashAggregate +Input [1]: [o_orderpriority#X] +Keys [1]: [o_orderpriority#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [o_orderpriority#X, count#X] + +(40) Exchange +Input [2]: [o_orderpriority#X, count#X] +Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(41) HashAggregate +Input [2]: [o_orderpriority#X, count#X] +Keys [1]: [o_orderpriority#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] + +(42) Exchange +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(43) Sort +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 + +(44) AdaptiveSparkPlan +Output [2]: [o_orderpriority#X, order_count#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/5.txt new file mode 100644 index 000000000000..b32dac8cace5 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/5.txt @@ -0,0 +1,549 @@ +== Physical Plan == +AdaptiveSparkPlan (100) ++- == Final Plan == + VeloxColumnarToRowExec (65) + +- ^ SortExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ^ InputAdapter (61) + +- ^ ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- ^ RegularHashAggregateExecTransformer (57) + +- ^ InputIteratorTransformer (56) + +- ^ InputAdapter (55) + +- ^ ShuffleQueryStage (54), Statistics(X) + +- ColumnarExchange (53) + +- ^ ProjectExecTransformer (51) + +- ^ FlushableHashAggregateExecTransformer (50) + +- ^ ProjectExecTransformer (49) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (48) + :- ^ ProjectExecTransformer (39) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (38) + : :- ^ ProjectExecTransformer (30) + : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (29) + : : :- ^ ProjectExecTransformer (21) + : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (20) + : : : :- ^ ProjectExecTransformer (12) + : : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (11) + : : : : :- ^ InputIteratorTransformer (7) + : : : : : +- ^ InputAdapter (6) + : : : : : +- ^ BroadcastQueryStage (5), Statistics(X) + : : : : : +- ColumnarBroadcastExchange (4) + : : : : : +- ^ FilterExecTransformer (2) + : : : : : +- ^ Scan parquet (1) + : : : : +- ^ ProjectExecTransformer (10) + : : : : +- ^ FilterExecTransformer (9) + : : : : +- ^ Scan parquet (8) + : : : +- ^ InputIteratorTransformer (19) + : : : +- ^ InputAdapter (18) + : : : +- ^ BroadcastQueryStage (17), Statistics(X) + : : : +- ColumnarBroadcastExchange (16) + : : : +- ^ FilterExecTransformer (14) + : : : +- ^ Scan parquet (13) + : : +- ^ InputIteratorTransformer (28) + : : +- ^ InputAdapter (27) + : : +- ^ BroadcastQueryStage (26), Statistics(X) + : : +- ColumnarBroadcastExchange (25) + : : +- ^ FilterExecTransformer (23) + : : +- ^ Scan parquet (22) + : +- ^ InputIteratorTransformer (37) + : +- ^ InputAdapter (36) + : +- ^ BroadcastQueryStage (35), Statistics(X) + : +- ColumnarBroadcastExchange (34) + : +- ^ FilterExecTransformer (32) + : +- ^ Scan parquet (31) + +- ^ InputIteratorTransformer (47) + +- ^ InputAdapter (46) + +- ^ BroadcastQueryStage (45), Statistics(X) + +- ColumnarBroadcastExchange (44) + +- ^ ProjectExecTransformer (42) + +- ^ FilterExecTransformer (41) + +- ^ Scan parquet (40) ++- == Initial Plan == + Sort (99) + +- Exchange (98) + +- HashAggregate (97) + +- Exchange (96) + +- HashAggregate (95) + +- Project (94) + +- BroadcastHashJoin Inner BuildRight (93) + :- Project (88) + : +- BroadcastHashJoin Inner BuildRight (87) + : :- Project (83) + : : +- BroadcastHashJoin Inner BuildRight (82) + : : :- Project (78) + : : : +- BroadcastHashJoin Inner BuildRight (77) + : : : :- Project (73) + : : : : +- BroadcastHashJoin Inner BuildLeft (72) + : : : : :- BroadcastExchange (68) + : : : : : +- Filter (67) + : : : : : +- Scan parquet (66) + : : : : +- Project (71) + : : : : +- Filter (70) + : : : : +- Scan parquet (69) + : : : +- BroadcastExchange (76) + : : : +- Filter (75) + : : : +- Scan parquet (74) + : : +- BroadcastExchange (81) + : : +- Filter (80) + : : +- Scan parquet (79) + : +- BroadcastExchange (86) + : +- Filter (85) + : +- Scan parquet (84) + +- BroadcastExchange (92) + +- Project (91) + +- Filter (90) + +- Scan parquet (89) + + +(1) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(3) WholeStageCodegenTransformer (X) +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: false + +(4) ColumnarBroadcastExchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(5) BroadcastQueryStage +Output [2]: [c_custkey#X, c_nationkey#X] +Arguments: X + +(6) InputAdapter +Input [2]: [c_custkey#X, c_nationkey#X] + +(7) InputIteratorTransformer +Input [2]: [c_custkey#X, c_nationkey#X] + +(8) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(9) FilterExecTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(10) ProjectExecTransformer +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(11) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(12) ProjectExecTransformer +Output [2]: [c_nationkey#X, o_orderkey#X] +Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] + +(13) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] +ReadSchema: struct + +(14) FilterExecTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) + +(15) WholeStageCodegenTransformer (X) +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(16) ColumnarBroadcastExchange +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(17) BroadcastQueryStage +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(18) InputAdapter +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(19) InputIteratorTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(20) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(21) ProjectExecTransformer +Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(22) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(23) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(24) WholeStageCodegenTransformer (X) +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: false + +(25) ColumnarBroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] + +(26) BroadcastQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(27) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(28) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(29) GlutenBroadcastHashJoinExecTransformer +Left keys [2]: [l_suppkey#X, c_nationkey#X] +Right keys [2]: [s_suppkey#X, s_nationkey#X] +Join type: Inner +Join condition: None + +(30) ProjectExecTransformer +Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(31) Scan parquet +Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] +ReadSchema: struct + +(32) FilterExecTransformer +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) + +(33) WholeStageCodegenTransformer (X) +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: false + +(34) ColumnarBroadcastExchange +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(35) BroadcastQueryStage +Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: X + +(36) InputAdapter +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] + +(37) InputIteratorTransformer +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] + +(38) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(39) ProjectExecTransformer +Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] + +(40) Scan parquet +Output [2]: [r_regionkey#X, r_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] +ReadSchema: struct + +(41) FilterExecTransformer +Input [2]: [r_regionkey#X, r_name#X] +Arguments: ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) + +(42) ProjectExecTransformer +Output [1]: [r_regionkey#X] +Input [2]: [r_regionkey#X, r_name#X] + +(43) WholeStageCodegenTransformer (X) +Input [1]: [r_regionkey#X] +Arguments: false + +(44) ColumnarBroadcastExchange +Input [1]: [r_regionkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(45) BroadcastQueryStage +Output [1]: [r_regionkey#X] +Arguments: X + +(46) InputAdapter +Input [1]: [r_regionkey#X] + +(47) InputIteratorTransformer +Input [1]: [r_regionkey#X] + +(48) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [n_regionkey#X] +Right keys [1]: [r_regionkey#X] +Join type: Inner +Join condition: None + +(49) ProjectExecTransformer +Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] + +(50) FlushableHashAggregateExecTransformer +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, _pre_X#X] +Keys [1]: [n_name#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [n_name#X, sum#X, isEmpty#X] + +(51) ProjectExecTransformer +Output [4]: [hash(n_name#X, 42) AS hash_partition_key#X, n_name#X, sum#X, isEmpty#X] +Input [3]: [n_name#X, sum#X, isEmpty#X] + +(52) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, n_name#X, sum#X, isEmpty#X] +Arguments: false + +(53) ColumnarExchange +Input [4]: [hash_partition_key#X, n_name#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [n_name#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(54) ShuffleQueryStage +Output [3]: [n_name#X, sum#X, isEmpty#X] +Arguments: X + +(55) InputAdapter +Input [3]: [n_name#X, sum#X, isEmpty#X] + +(56) InputIteratorTransformer +Input [3]: [n_name#X, sum#X, isEmpty#X] + +(57) RegularHashAggregateExecTransformer +Input [3]: [n_name#X, sum#X, isEmpty#X] +Keys [1]: [n_name#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] + +(58) WholeStageCodegenTransformer (X) +Input [2]: [n_name#X, revenue#X] +Arguments: false + +(59) ColumnarExchange +Input [2]: [n_name#X, revenue#X] +Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(60) ShuffleQueryStage +Output [2]: [n_name#X, revenue#X] +Arguments: X + +(61) InputAdapter +Input [2]: [n_name#X, revenue#X] + +(62) InputIteratorTransformer +Input [2]: [n_name#X, revenue#X] + +(63) SortExecTransformer +Input [2]: [n_name#X, revenue#X] +Arguments: [revenue#X DESC NULLS LAST], true, 0 + +(64) WholeStageCodegenTransformer (X) +Input [2]: [n_name#X, revenue#X] +Arguments: false + +(65) VeloxColumnarToRowExec +Input [2]: [n_name#X, revenue#X] + +(66) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(67) Filter +Input [2]: [c_custkey#X, c_nationkey#X] +Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(68) BroadcastExchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(69) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(70) Filter +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(71) Project +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(72) BroadcastHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(73) Project +Output [2]: [c_nationkey#X, o_orderkey#X] +Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] + +(74) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] +ReadSchema: struct + +(75) Filter +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) + +(76) BroadcastExchange +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(77) BroadcastHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(78) Project +Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(79) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(80) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(81) BroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] + +(82) BroadcastHashJoin +Left keys [2]: [l_suppkey#X, c_nationkey#X] +Right keys [2]: [s_suppkey#X, s_nationkey#X] +Join type: Inner +Join condition: None + +(83) Project +Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(84) Scan parquet +Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] +ReadSchema: struct + +(85) Filter +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) + +(86) BroadcastExchange +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(87) BroadcastHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(88) Project +Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] + +(89) Scan parquet +Output [2]: [r_regionkey#X, r_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] +ReadSchema: struct + +(90) Filter +Input [2]: [r_regionkey#X, r_name#X] +Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) + +(91) Project +Output [1]: [r_regionkey#X] +Input [2]: [r_regionkey#X, r_name#X] + +(92) BroadcastExchange +Input [1]: [r_regionkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(93) BroadcastHashJoin +Left keys [1]: [n_regionkey#X] +Right keys [1]: [r_regionkey#X] +Join type: Inner +Join condition: None + +(94) Project +Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] +Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] + +(95) HashAggregate +Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] +Keys [1]: [n_name#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [n_name#X, sum#X, isEmpty#X] + +(96) Exchange +Input [3]: [n_name#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(97) HashAggregate +Input [3]: [n_name#X, sum#X, isEmpty#X] +Keys [1]: [n_name#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] + +(98) Exchange +Input [2]: [n_name#X, revenue#X] +Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(99) Sort +Input [2]: [n_name#X, revenue#X] +Arguments: [revenue#X DESC NULLS LAST], true, 0 + +(100) AdaptiveSparkPlan +Output [2]: [n_name#X, revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/6.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/6.txt new file mode 100644 index 000000000000..0882ff9e151c --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/6.txt @@ -0,0 +1,112 @@ +== Physical Plan == +AdaptiveSparkPlan (19) ++- == Final Plan == + VeloxColumnarToRowExec (12) + +- ^ RegularHashAggregateExecTransformer (10) + +- ^ InputIteratorTransformer (9) + +- ^ InputAdapter (8) + +- ^ ShuffleQueryStage (7), Statistics(X) + +- ColumnarExchange (6) + +- ^ FlushableHashAggregateExecTransformer (4) + +- ^ ProjectExecTransformer (3) + +- ^ FilterExecTransformer (2) + +- ^ Scan parquet (1) ++- == Initial Plan == + HashAggregate (18) + +- Exchange (17) + +- HashAggregate (16) + +- Project (15) + +- Filter (14) + +- Scan parquet (13) + + +(1) Scan parquet +Output [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), IsNotNull(l_discount), IsNotNull(l_quantity), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), GreaterThanOrEqual(l_discount,0.05), LessThanOrEqual(l_discount,0.07), LessThan(l_quantity,24.00)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: (((((((isnotnull(l_shipdate#X) AND isnotnull(l_discount#X)) AND isnotnull(l_quantity#X)) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND (l_discount#X >= 0.05)) AND (l_discount#X <= 0.07)) AND (l_quantity#X < 24.00)) + +(3) ProjectExecTransformer +Output [3]: [l_extendedprice#X, l_discount#X, (l_extendedprice#X * l_discount#X) AS _pre_X#X] +Input [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(4) FlushableHashAggregateExecTransformer +Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] +Keys: [] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(5) WholeStageCodegenTransformer (X) +Input [2]: [sum#X, isEmpty#X] +Arguments: false + +(6) ColumnarExchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(7) ShuffleQueryStage +Output [2]: [sum#X, isEmpty#X] +Arguments: X + +(8) InputAdapter +Input [2]: [sum#X, isEmpty#X] + +(9) InputIteratorTransformer +Input [2]: [sum#X, isEmpty#X] + +(10) RegularHashAggregateExecTransformer +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((l_extendedprice#X * l_discount#X))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * l_discount#X))#X] +Results [1]: [sum((l_extendedprice#X * l_discount#X))#X AS revenue#X] + +(11) WholeStageCodegenTransformer (X) +Input [1]: [revenue#X] +Arguments: false + +(12) VeloxColumnarToRowExec +Input [1]: [revenue#X] + +(13) Scan parquet +Output [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), IsNotNull(l_discount), IsNotNull(l_quantity), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), GreaterThanOrEqual(l_discount,0.05), LessThanOrEqual(l_discount,0.07), LessThan(l_quantity,24.00)] +ReadSchema: struct + +(14) Filter +Input [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : (((((((isnotnull(l_shipdate#X) AND isnotnull(l_discount#X)) AND isnotnull(l_quantity#X)) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND (l_discount#X >= 0.05)) AND (l_discount#X <= 0.07)) AND (l_quantity#X < 24.00)) + +(15) Project +Output [2]: [l_extendedprice#X, l_discount#X] +Input [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(16) HashAggregate +Input [2]: [l_extendedprice#X, l_discount#X] +Keys: [] +Functions [1]: [partial_sum((l_extendedprice#X * l_discount#X))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(17) Exchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] + +(18) HashAggregate +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((l_extendedprice#X * l_discount#X))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * l_discount#X))#X] +Results [1]: [sum((l_extendedprice#X * l_discount#X))#X AS revenue#X] + +(19) AdaptiveSparkPlan +Output [1]: [revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/7.txt new file mode 100644 index 000000000000..ade9e522af4c --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/7.txt @@ -0,0 +1,511 @@ +== Physical Plan == +AdaptiveSparkPlan (93) ++- == Final Plan == + VeloxColumnarToRowExec (60) + +- ^ SortExecTransformer (58) + +- ^ InputIteratorTransformer (57) + +- ^ InputAdapter (56) + +- ^ ShuffleQueryStage (55), Statistics(X) + +- ColumnarExchange (54) + +- ^ RegularHashAggregateExecTransformer (52) + +- ^ InputIteratorTransformer (51) + +- ^ InputAdapter (50) + +- ^ ShuffleQueryStage (49), Statistics(X) + +- ColumnarExchange (48) + +- ^ ProjectExecTransformer (46) + +- ^ FlushableHashAggregateExecTransformer (45) + +- ^ ProjectExecTransformer (44) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (43) + :- ^ ProjectExecTransformer (38) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (37) + : :- ^ ProjectExecTransformer (29) + : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (28) + : : :- ^ ProjectExecTransformer (20) + : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (19) + : : : :- ^ ProjectExecTransformer (11) + : : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (10) + : : : : :- ^ InputIteratorTransformer (7) + : : : : : +- ^ InputAdapter (6) + : : : : : +- ^ BroadcastQueryStage (5), Statistics(X) + : : : : : +- ColumnarBroadcastExchange (4) + : : : : : +- ^ FilterExecTransformer (2) + : : : : : +- ^ Scan parquet (1) + : : : : +- ^ FilterExecTransformer (9) + : : : : +- ^ Scan parquet (8) + : : : +- ^ InputIteratorTransformer (18) + : : : +- ^ InputAdapter (17) + : : : +- ^ BroadcastQueryStage (16), Statistics(X) + : : : +- ColumnarBroadcastExchange (15) + : : : +- ^ FilterExecTransformer (13) + : : : +- ^ Scan parquet (12) + : : +- ^ InputIteratorTransformer (27) + : : +- ^ InputAdapter (26) + : : +- ^ BroadcastQueryStage (25), Statistics(X) + : : +- ColumnarBroadcastExchange (24) + : : +- ^ FilterExecTransformer (22) + : : +- ^ Scan parquet (21) + : +- ^ InputIteratorTransformer (36) + : +- ^ InputAdapter (35) + : +- ^ BroadcastQueryStage (34), Statistics(X) + : +- ColumnarBroadcastExchange (33) + : +- ^ FilterExecTransformer (31) + : +- ^ Scan parquet (30) + +- ^ InputIteratorTransformer (42) + +- ^ InputAdapter (41) + +- ^ BroadcastQueryStage (40), Statistics(X) + +- ReusedExchange (39) ++- == Initial Plan == + Sort (92) + +- Exchange (91) + +- HashAggregate (90) + +- Exchange (89) + +- HashAggregate (88) + +- Project (87) + +- BroadcastHashJoin Inner BuildRight (86) + :- Project (82) + : +- BroadcastHashJoin Inner BuildRight (81) + : :- Project (77) + : : +- BroadcastHashJoin Inner BuildRight (76) + : : :- Project (72) + : : : +- BroadcastHashJoin Inner BuildRight (71) + : : : :- Project (67) + : : : : +- BroadcastHashJoin Inner BuildLeft (66) + : : : : :- BroadcastExchange (63) + : : : : : +- Filter (62) + : : : : : +- Scan parquet (61) + : : : : +- Filter (65) + : : : : +- Scan parquet (64) + : : : +- BroadcastExchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- BroadcastExchange (75) + : : +- Filter (74) + : : +- Scan parquet (73) + : +- BroadcastExchange (80) + : +- Filter (79) + : +- Scan parquet (78) + +- BroadcastExchange (85) + +- Filter (84) + +- Scan parquet (83) + + +(1) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(3) WholeStageCodegenTransformer (X) +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: false + +(4) ColumnarBroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(5) BroadcastQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(6) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(7) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(8) Scan parquet +Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(9) FilterExecTransformer +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(10) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_suppkey#X] +Right keys [1]: [l_suppkey#X] +Join type: Inner +Join condition: None + +(11) ProjectExecTransformer +Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(12) Scan parquet +Output [2]: [o_orderkey#X, o_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] +ReadSchema: struct + +(13) FilterExecTransformer +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) + +(14) WholeStageCodegenTransformer (X) +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: false + +(15) ColumnarBroadcastExchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(16) BroadcastQueryStage +Output [2]: [o_orderkey#X, o_custkey#X] +Arguments: X + +(17) InputAdapter +Input [2]: [o_orderkey#X, o_custkey#X] + +(18) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_custkey#X] + +(19) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(20) ProjectExecTransformer +Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] + +(21) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(22) FilterExecTransformer +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(23) WholeStageCodegenTransformer (X) +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: false + +(24) ColumnarBroadcastExchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(25) BroadcastQueryStage +Output [2]: [c_custkey#X, c_nationkey#X] +Arguments: X + +(26) InputAdapter +Input [2]: [c_custkey#X, c_nationkey#X] + +(27) InputIteratorTransformer +Input [2]: [c_custkey#X, c_nationkey#X] + +(28) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_custkey#X] +Right keys [1]: [c_custkey#X] +Join type: Inner +Join condition: None + +(29) ProjectExecTransformer +Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] + +(30) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] +ReadSchema: struct + +(31) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) + +(32) WholeStageCodegenTransformer (X) +Input [2]: [n_nationkey#X, n_name#X] +Arguments: false + +(33) ColumnarBroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(34) BroadcastQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(35) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(36) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(37) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(38) ProjectExecTransformer +Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] + +(39) ReusedExchange [Reuses operator id: 33] +Output [2]: [n_nationkey#X, n_name#X] + +(40) BroadcastQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(41) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(42) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(43) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) + +(44) ProjectExecTransformer +Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] +Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] + +(45) FlushableHashAggregateExecTransformer +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] +Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] +Functions [1]: [partial_sum(volume#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(46) ProjectExecTransformer +Output [6]: [hash(supp_nation#X, cust_nation#X, l_year#X, 42) AS hash_partition_key#X, supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(47) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Arguments: false + +(48) ColumnarExchange +Input [6]: [hash_partition_key#X, supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(49) ShuffleQueryStage +Output [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Arguments: X + +(50) InputAdapter +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(51) InputIteratorTransformer +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(52) RegularHashAggregateExecTransformer +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] +Functions [1]: [sum(volume#X)] +Aggregate Attributes [1]: [sum(volume#X)#X] +Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] + +(53) WholeStageCodegenTransformer (X) +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: false + +(54) ColumnarExchange +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(55) ShuffleQueryStage +Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: X + +(56) InputAdapter +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] + +(57) InputIteratorTransformer +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] + +(58) SortExecTransformer +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 + +(59) WholeStageCodegenTransformer (X) +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: false + +(60) VeloxColumnarToRowExec +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] + +(61) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(62) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(63) BroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(64) Scan parquet +Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(65) Filter +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(66) BroadcastHashJoin +Left keys [1]: [s_suppkey#X] +Right keys [1]: [l_suppkey#X] +Join type: Inner +Join condition: None + +(67) Project +Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(68) Scan parquet +Output [2]: [o_orderkey#X, o_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] +ReadSchema: struct + +(69) Filter +Input [2]: [o_orderkey#X, o_custkey#X] +Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) + +(70) BroadcastExchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(71) BroadcastHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(72) Project +Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] + +(73) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(74) Filter +Input [2]: [c_custkey#X, c_nationkey#X] +Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(75) BroadcastExchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(76) BroadcastHashJoin +Left keys [1]: [o_custkey#X] +Right keys [1]: [c_custkey#X] +Join type: Inner +Join condition: None + +(77) Project +Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] + +(78) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] +ReadSchema: struct + +(79) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) + +(80) BroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(81) BroadcastHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(82) Project +Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] + +(83) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] +ReadSchema: struct + +(84) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) + +(85) BroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(86) BroadcastHashJoin +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) + +(87) Project +Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] +Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] + +(88) HashAggregate +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] +Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] +Functions [1]: [partial_sum(volume#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(89) Exchange +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(90) HashAggregate +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] +Functions [1]: [sum(volume#X)] +Aggregate Attributes [1]: [sum(volume#X)#X] +Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] + +(91) Exchange +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(92) Sort +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 + +(93) AdaptiveSparkPlan +Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/8.txt new file mode 100644 index 000000000000..3825a80802f3 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/8.txt @@ -0,0 +1,708 @@ +== Physical Plan == +AdaptiveSparkPlan (129) ++- == Final Plan == + VeloxColumnarToRowExec (84) + +- ^ SortExecTransformer (82) + +- ^ InputIteratorTransformer (81) + +- ^ InputAdapter (80) + +- ^ ShuffleQueryStage (79), Statistics(X) + +- ColumnarExchange (78) + +- ^ ProjectExecTransformer (76) + +- ^ RegularHashAggregateExecTransformer (75) + +- ^ InputIteratorTransformer (74) + +- ^ InputAdapter (73) + +- ^ ShuffleQueryStage (72), Statistics(X) + +- ColumnarExchange (71) + +- ^ ProjectExecTransformer (69) + +- ^ FlushableHashAggregateExecTransformer (68) + +- ^ ProjectExecTransformer (67) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (66) + :- ^ ProjectExecTransformer (57) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (56) + : :- ^ ProjectExecTransformer (48) + : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (47) + : : :- ^ ProjectExecTransformer (39) + : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (38) + : : : :- ^ ProjectExecTransformer (30) + : : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (29) + : : : : :- ^ ProjectExecTransformer (21) + : : : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (20) + : : : : : :- ^ ProjectExecTransformer (12) + : : : : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (11) + : : : : : : :- ^ InputIteratorTransformer (8) + : : : : : : : +- ^ InputAdapter (7) + : : : : : : : +- ^ BroadcastQueryStage (6), Statistics(X) + : : : : : : : +- ColumnarBroadcastExchange (5) + : : : : : : : +- ^ ProjectExecTransformer (3) + : : : : : : : +- ^ FilterExecTransformer (2) + : : : : : : : +- ^ Scan parquet (1) + : : : : : : +- ^ FilterExecTransformer (10) + : : : : : : +- ^ Scan parquet (9) + : : : : : +- ^ InputIteratorTransformer (19) + : : : : : +- ^ InputAdapter (18) + : : : : : +- ^ BroadcastQueryStage (17), Statistics(X) + : : : : : +- ColumnarBroadcastExchange (16) + : : : : : +- ^ FilterExecTransformer (14) + : : : : : +- ^ Scan parquet (13) + : : : : +- ^ InputIteratorTransformer (28) + : : : : +- ^ InputAdapter (27) + : : : : +- ^ BroadcastQueryStage (26), Statistics(X) + : : : : +- ColumnarBroadcastExchange (25) + : : : : +- ^ FilterExecTransformer (23) + : : : : +- ^ Scan parquet (22) + : : : +- ^ InputIteratorTransformer (37) + : : : +- ^ InputAdapter (36) + : : : +- ^ BroadcastQueryStage (35), Statistics(X) + : : : +- ColumnarBroadcastExchange (34) + : : : +- ^ FilterExecTransformer (32) + : : : +- ^ Scan parquet (31) + : : +- ^ InputIteratorTransformer (46) + : : +- ^ InputAdapter (45) + : : +- ^ BroadcastQueryStage (44), Statistics(X) + : : +- ColumnarBroadcastExchange (43) + : : +- ^ FilterExecTransformer (41) + : : +- ^ Scan parquet (40) + : +- ^ InputIteratorTransformer (55) + : +- ^ InputAdapter (54) + : +- ^ BroadcastQueryStage (53), Statistics(X) + : +- ColumnarBroadcastExchange (52) + : +- ^ FilterExecTransformer (50) + : +- ^ Scan parquet (49) + +- ^ InputIteratorTransformer (65) + +- ^ InputAdapter (64) + +- ^ BroadcastQueryStage (63), Statistics(X) + +- ColumnarBroadcastExchange (62) + +- ^ ProjectExecTransformer (60) + +- ^ FilterExecTransformer (59) + +- ^ Scan parquet (58) ++- == Initial Plan == + Sort (128) + +- Exchange (127) + +- HashAggregate (126) + +- Exchange (125) + +- HashAggregate (124) + +- Project (123) + +- BroadcastHashJoin Inner BuildRight (122) + :- Project (117) + : +- BroadcastHashJoin Inner BuildRight (116) + : :- Project (112) + : : +- BroadcastHashJoin Inner BuildRight (111) + : : :- Project (107) + : : : +- BroadcastHashJoin Inner BuildRight (106) + : : : :- Project (102) + : : : : +- BroadcastHashJoin Inner BuildRight (101) + : : : : :- Project (97) + : : : : : +- BroadcastHashJoin Inner BuildRight (96) + : : : : : :- Project (92) + : : : : : : +- BroadcastHashJoin Inner BuildLeft (91) + : : : : : : :- BroadcastExchange (88) + : : : : : : : +- Project (87) + : : : : : : : +- Filter (86) + : : : : : : : +- Scan parquet (85) + : : : : : : +- Filter (90) + : : : : : : +- Scan parquet (89) + : : : : : +- BroadcastExchange (95) + : : : : : +- Filter (94) + : : : : : +- Scan parquet (93) + : : : : +- BroadcastExchange (100) + : : : : +- Filter (99) + : : : : +- Scan parquet (98) + : : : +- BroadcastExchange (105) + : : : +- Filter (104) + : : : +- Scan parquet (103) + : : +- BroadcastExchange (110) + : : +- Filter (109) + : : +- Scan parquet (108) + : +- BroadcastExchange (115) + : +- Filter (114) + : +- Scan parquet (113) + +- BroadcastExchange (121) + +- Project (120) + +- Filter (119) + +- Scan parquet (118) + + +(1) Scan parquet +Output [2]: [p_partkey#X, p_type#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_type), EqualTo(p_type,ECONOMY ANODIZED STEEL), IsNotNull(p_partkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [p_partkey#X, p_type#X] +Arguments: ((isnotnull(p_type#X) AND (p_type#X = ECONOMY ANODIZED STEEL)) AND isnotnull(p_partkey#X)) + +(3) ProjectExecTransformer +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_type#X] + +(4) WholeStageCodegenTransformer (X) +Input [1]: [p_partkey#X] +Arguments: false + +(5) ColumnarBroadcastExchange +Input [1]: [p_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(6) BroadcastQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(7) InputAdapter +Input [1]: [p_partkey#X] + +(8) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(9) Scan parquet +Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(11) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: None + +(12) ProjectExecTransformer +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(13) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(14) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(15) WholeStageCodegenTransformer (X) +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: false + +(16) ColumnarBroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(17) BroadcastQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(18) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(19) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(20) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(21) ProjectExecTransformer +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(22) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] +ReadSchema: struct + +(23) FilterExecTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) + +(24) WholeStageCodegenTransformer (X) +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: false + +(25) ColumnarBroadcastExchange +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(26) BroadcastQueryStage +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: X + +(27) InputAdapter +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(28) InputIteratorTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(29) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(30) ProjectExecTransformer +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] + +(31) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(32) FilterExecTransformer +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(33) WholeStageCodegenTransformer (X) +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: false + +(34) ColumnarBroadcastExchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(35) BroadcastQueryStage +Output [2]: [c_custkey#X, c_nationkey#X] +Arguments: X + +(36) InputAdapter +Input [2]: [c_custkey#X, c_nationkey#X] + +(37) InputIteratorTransformer +Input [2]: [c_custkey#X, c_nationkey#X] + +(38) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [o_custkey#X] +Right keys [1]: [c_custkey#X] +Join type: Inner +Join condition: None + +(39) ProjectExecTransformer +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] + +(40) Scan parquet +Output [2]: [n_nationkey#X, n_regionkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] +ReadSchema: struct + +(41) FilterExecTransformer +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) + +(42) WholeStageCodegenTransformer (X) +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: false + +(43) ColumnarBroadcastExchange +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(44) BroadcastQueryStage +Output [2]: [n_nationkey#X, n_regionkey#X] +Arguments: X + +(45) InputAdapter +Input [2]: [n_nationkey#X, n_regionkey#X] + +(46) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_regionkey#X] + +(47) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(48) ProjectExecTransformer +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] + +(49) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(50) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: isnotnull(n_nationkey#X) + +(51) WholeStageCodegenTransformer (X) +Input [2]: [n_nationkey#X, n_name#X] +Arguments: false + +(52) ColumnarBroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(53) BroadcastQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(54) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(55) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(56) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(57) ProjectExecTransformer +Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] + +(58) Scan parquet +Output [2]: [r_regionkey#X, r_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] +ReadSchema: struct + +(59) FilterExecTransformer +Input [2]: [r_regionkey#X, r_name#X] +Arguments: ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) + +(60) ProjectExecTransformer +Output [1]: [r_regionkey#X] +Input [2]: [r_regionkey#X, r_name#X] + +(61) WholeStageCodegenTransformer (X) +Input [1]: [r_regionkey#X] +Arguments: false + +(62) ColumnarBroadcastExchange +Input [1]: [r_regionkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(63) BroadcastQueryStage +Output [1]: [r_regionkey#X] +Arguments: X + +(64) InputAdapter +Input [1]: [r_regionkey#X] + +(65) InputIteratorTransformer +Input [1]: [r_regionkey#X] + +(66) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [n_regionkey#X] +Right keys [1]: [r_regionkey#X] +Join type: Inner +Join condition: None + +(67) ProjectExecTransformer +Output [4]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X, CASE WHEN (n_name#X = BRAZIL) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END AS _pre_X#X] +Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] + +(68) FlushableHashAggregateExecTransformer +Input [4]: [o_year#X, volume#X, nation#X, _pre_X#X] +Keys [1]: [o_year#X] +Functions [2]: [partial_sum(_pre_X#X), partial_sum(volume#X)] +Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(69) ProjectExecTransformer +Output [6]: [hash(o_year#X, 42) AS hash_partition_key#X, o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(70) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: false + +(71) ColumnarExchange +Input [6]: [hash_partition_key#X, o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(72) ShuffleQueryStage +Output [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: X + +(73) InputAdapter +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(74) InputIteratorTransformer +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(75) RegularHashAggregateExecTransformer +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Keys [1]: [o_year#X] +Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] +Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] +Results [3]: [o_year#X, sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] + +(76) ProjectExecTransformer +Output [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] +Input [3]: [o_year#X, sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] + +(77) WholeStageCodegenTransformer (X) +Input [2]: [o_year#X, mkt_share#X] +Arguments: false + +(78) ColumnarExchange +Input [2]: [o_year#X, mkt_share#X] +Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(79) ShuffleQueryStage +Output [2]: [o_year#X, mkt_share#X] +Arguments: X + +(80) InputAdapter +Input [2]: [o_year#X, mkt_share#X] + +(81) InputIteratorTransformer +Input [2]: [o_year#X, mkt_share#X] + +(82) SortExecTransformer +Input [2]: [o_year#X, mkt_share#X] +Arguments: [o_year#X ASC NULLS FIRST], true, 0 + +(83) WholeStageCodegenTransformer (X) +Input [2]: [o_year#X, mkt_share#X] +Arguments: false + +(84) VeloxColumnarToRowExec +Input [2]: [o_year#X, mkt_share#X] + +(85) Scan parquet +Output [2]: [p_partkey#X, p_type#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_type), EqualTo(p_type,ECONOMY ANODIZED STEEL), IsNotNull(p_partkey)] +ReadSchema: struct + +(86) Filter +Input [2]: [p_partkey#X, p_type#X] +Condition : ((isnotnull(p_type#X) AND (p_type#X = ECONOMY ANODIZED STEEL)) AND isnotnull(p_partkey#X)) + +(87) Project +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_type#X] + +(88) BroadcastExchange +Input [1]: [p_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(89) Scan parquet +Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(90) Filter +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(91) BroadcastHashJoin +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: None + +(92) Project +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(93) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(94) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(95) BroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(96) BroadcastHashJoin +Left keys [1]: [l_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(97) Project +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(98) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] +ReadSchema: struct + +(99) Filter +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) + +(100) BroadcastExchange +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(101) BroadcastHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(102) Project +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] + +(103) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(104) Filter +Input [2]: [c_custkey#X, c_nationkey#X] +Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(105) BroadcastExchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(106) BroadcastHashJoin +Left keys [1]: [o_custkey#X] +Right keys [1]: [c_custkey#X] +Join type: Inner +Join condition: None + +(107) Project +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] + +(108) Scan parquet +Output [2]: [n_nationkey#X, n_regionkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] +ReadSchema: struct + +(109) Filter +Input [2]: [n_nationkey#X, n_regionkey#X] +Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) + +(110) BroadcastExchange +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(111) BroadcastHashJoin +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(112) Project +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] + +(113) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(114) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : isnotnull(n_nationkey#X) + +(115) BroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(116) BroadcastHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(117) Project +Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] + +(118) Scan parquet +Output [2]: [r_regionkey#X, r_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] +ReadSchema: struct + +(119) Filter +Input [2]: [r_regionkey#X, r_name#X] +Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) + +(120) Project +Output [1]: [r_regionkey#X] +Input [2]: [r_regionkey#X, r_name#X] + +(121) BroadcastExchange +Input [1]: [r_regionkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(122) BroadcastHashJoin +Left keys [1]: [n_regionkey#X] +Right keys [1]: [r_regionkey#X] +Join type: Inner +Join condition: None + +(123) Project +Output [3]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X] +Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] + +(124) HashAggregate +Input [3]: [o_year#X, volume#X, nation#X] +Keys [1]: [o_year#X] +Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] +Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(125) Exchange +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(126) HashAggregate +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Keys [1]: [o_year#X] +Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] +Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] +Results [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] + +(127) Exchange +Input [2]: [o_year#X, mkt_share#X] +Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(128) Sort +Input [2]: [o_year#X, mkt_share#X] +Arguments: [o_year#X ASC NULLS FIRST], true, 0 + +(129) AdaptiveSparkPlan +Output [2]: [o_year#X, mkt_share#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/9.txt new file mode 100644 index 000000000000..4fcbb237e4e6 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/9.txt @@ -0,0 +1,539 @@ +== Physical Plan == +AdaptiveSparkPlan (98) ++- == Final Plan == + VeloxColumnarToRowExec (64) + +- ^ SortExecTransformer (62) + +- ^ InputIteratorTransformer (61) + +- ^ InputAdapter (60) + +- ^ ShuffleQueryStage (59), Statistics(X) + +- ColumnarExchange (58) + +- ^ RegularHashAggregateExecTransformer (56) + +- ^ InputIteratorTransformer (55) + +- ^ InputAdapter (54) + +- ^ ShuffleQueryStage (53), Statistics(X) + +- ColumnarExchange (52) + +- ^ ProjectExecTransformer (50) + +- ^ FlushableHashAggregateExecTransformer (49) + +- ^ ProjectExecTransformer (48) + +- ^ GlutenBroadcastHashJoinExecTransformer Inner (47) + :- ^ ProjectExecTransformer (39) + : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (38) + : :- ^ ProjectExecTransformer (30) + : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (29) + : : :- ^ ProjectExecTransformer (21) + : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (20) + : : : :- ^ ProjectExecTransformer (12) + : : : : +- ^ GlutenBroadcastHashJoinExecTransformer Inner (11) + : : : : :- ^ InputIteratorTransformer (8) + : : : : : +- ^ InputAdapter (7) + : : : : : +- ^ BroadcastQueryStage (6), Statistics(X) + : : : : : +- ColumnarBroadcastExchange (5) + : : : : : +- ^ ProjectExecTransformer (3) + : : : : : +- ^ FilterExecTransformer (2) + : : : : : +- ^ Scan parquet (1) + : : : : +- ^ FilterExecTransformer (10) + : : : : +- ^ Scan parquet (9) + : : : +- ^ InputIteratorTransformer (19) + : : : +- ^ InputAdapter (18) + : : : +- ^ BroadcastQueryStage (17), Statistics(X) + : : : +- ColumnarBroadcastExchange (16) + : : : +- ^ FilterExecTransformer (14) + : : : +- ^ Scan parquet (13) + : : +- ^ InputIteratorTransformer (28) + : : +- ^ InputAdapter (27) + : : +- ^ BroadcastQueryStage (26), Statistics(X) + : : +- ColumnarBroadcastExchange (25) + : : +- ^ FilterExecTransformer (23) + : : +- ^ Scan parquet (22) + : +- ^ InputIteratorTransformer (37) + : +- ^ InputAdapter (36) + : +- ^ BroadcastQueryStage (35), Statistics(X) + : +- ColumnarBroadcastExchange (34) + : +- ^ FilterExecTransformer (32) + : +- ^ Scan parquet (31) + +- ^ InputIteratorTransformer (46) + +- ^ InputAdapter (45) + +- ^ BroadcastQueryStage (44), Statistics(X) + +- ColumnarBroadcastExchange (43) + +- ^ FilterExecTransformer (41) + +- ^ Scan parquet (40) ++- == Initial Plan == + Sort (97) + +- Exchange (96) + +- HashAggregate (95) + +- Exchange (94) + +- HashAggregate (93) + +- Project (92) + +- BroadcastHashJoin Inner BuildRight (91) + :- Project (87) + : +- BroadcastHashJoin Inner BuildRight (86) + : :- Project (82) + : : +- BroadcastHashJoin Inner BuildRight (81) + : : :- Project (77) + : : : +- BroadcastHashJoin Inner BuildRight (76) + : : : :- Project (72) + : : : : +- BroadcastHashJoin Inner BuildLeft (71) + : : : : :- BroadcastExchange (68) + : : : : : +- Project (67) + : : : : : +- Filter (66) + : : : : : +- Scan parquet (65) + : : : : +- Filter (70) + : : : : +- Scan parquet (69) + : : : +- BroadcastExchange (75) + : : : +- Filter (74) + : : : +- Scan parquet (73) + : : +- BroadcastExchange (80) + : : +- Filter (79) + : : +- Scan parquet (78) + : +- BroadcastExchange (85) + : +- Filter (84) + : +- Scan parquet (83) + +- BroadcastExchange (90) + +- Filter (89) + +- Scan parquet (88) + + +(1) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringContains(p_name,green), IsNotNull(p_partkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [p_partkey#X, p_name#X] +Arguments: ((isnotnull(p_name#X) AND Contains(p_name#X, green)) AND isnotnull(p_partkey#X)) + +(3) ProjectExecTransformer +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(4) WholeStageCodegenTransformer (X) +Input [1]: [p_partkey#X] +Arguments: false + +(5) ColumnarBroadcastExchange +Input [1]: [p_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(6) BroadcastQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(7) InputAdapter +Input [1]: [p_partkey#X] + +(8) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(9) Scan parquet +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(11) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: None + +(12) ProjectExecTransformer +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(13) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(14) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(15) WholeStageCodegenTransformer (X) +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: false + +(16) ColumnarBroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(17) BroadcastQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(18) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(19) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(20) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(21) ProjectExecTransformer +Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(22) Scan parquet +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] +ReadSchema: struct + +(23) FilterExecTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) + +(24) WholeStageCodegenTransformer (X) +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: false + +(25) ColumnarBroadcastExchange +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [plan_id=X] + +(26) BroadcastQueryStage +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: X + +(27) InputAdapter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(28) InputIteratorTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(29) GlutenBroadcastHashJoinExecTransformer +Left keys [2]: [l_suppkey#X, l_partkey#X] +Right keys [2]: [ps_suppkey#X, ps_partkey#X] +Join type: Inner +Join condition: None + +(30) ProjectExecTransformer +Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(31) Scan parquet +Output [2]: [o_orderkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey)] +ReadSchema: struct + +(32) FilterExecTransformer +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: isnotnull(o_orderkey#X) + +(33) WholeStageCodegenTransformer (X) +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: false + +(34) ColumnarBroadcastExchange +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(35) BroadcastQueryStage +Output [2]: [o_orderkey#X, o_orderdate#X] +Arguments: X + +(36) InputAdapter +Input [2]: [o_orderkey#X, o_orderdate#X] + +(37) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_orderdate#X] + +(38) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(39) ProjectExecTransformer +Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] + +(40) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(41) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: isnotnull(n_nationkey#X) + +(42) WholeStageCodegenTransformer (X) +Input [2]: [n_nationkey#X, n_name#X] +Arguments: false + +(43) ColumnarBroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(44) BroadcastQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(45) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(46) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(47) GlutenBroadcastHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(48) ProjectExecTransformer +Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] + +(49) FlushableHashAggregateExecTransformer +Input [3]: [nation#X, o_year#X, amount#X] +Keys [2]: [nation#X, o_year#X] +Functions [1]: [partial_sum(amount#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(50) ProjectExecTransformer +Output [5]: [hash(nation#X, o_year#X, 42) AS hash_partition_key#X, nation#X, o_year#X, sum#X, isEmpty#X] +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(51) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, nation#X, o_year#X, sum#X, isEmpty#X] +Arguments: false + +(52) ColumnarExchange +Input [5]: [hash_partition_key#X, nation#X, o_year#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [nation#X, o_year#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(53) ShuffleQueryStage +Output [4]: [nation#X, o_year#X, sum#X, isEmpty#X] +Arguments: X + +(54) InputAdapter +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(55) InputIteratorTransformer +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(56) RegularHashAggregateExecTransformer +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] +Keys [2]: [nation#X, o_year#X] +Functions [1]: [sum(amount#X)] +Aggregate Attributes [1]: [sum(amount#X)#X] +Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] + +(57) WholeStageCodegenTransformer (X) +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: false + +(58) ColumnarExchange +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(59) ShuffleQueryStage +Output [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: X + +(60) InputAdapter +Input [3]: [nation#X, o_year#X, sum_profit#X] + +(61) InputIteratorTransformer +Input [3]: [nation#X, o_year#X, sum_profit#X] + +(62) SortExecTransformer +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 + +(63) WholeStageCodegenTransformer (X) +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: false + +(64) VeloxColumnarToRowExec +Input [3]: [nation#X, o_year#X, sum_profit#X] + +(65) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringContains(p_name,green), IsNotNull(p_partkey)] +ReadSchema: struct + +(66) Filter +Input [2]: [p_partkey#X, p_name#X] +Condition : ((isnotnull(p_name#X) AND Contains(p_name#X, green)) AND isnotnull(p_partkey#X)) + +(67) Project +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(68) BroadcastExchange +Input [1]: [p_partkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] + +(69) Scan parquet +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(70) Filter +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(71) BroadcastHashJoin +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: None + +(72) Project +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(73) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(74) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(75) BroadcastExchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(76) BroadcastHashJoin +Left keys [1]: [l_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(77) Project +Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(78) Scan parquet +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] +ReadSchema: struct + +(79) Filter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) + +(80) BroadcastExchange +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [plan_id=X] + +(81) BroadcastHashJoin +Left keys [2]: [l_suppkey#X, l_partkey#X] +Right keys [2]: [ps_suppkey#X, ps_partkey#X] +Join type: Inner +Join condition: None + +(82) Project +Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(83) Scan parquet +Output [2]: [o_orderkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey)] +ReadSchema: struct + +(84) Filter +Input [2]: [o_orderkey#X, o_orderdate#X] +Condition : isnotnull(o_orderkey#X) + +(85) BroadcastExchange +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(86) BroadcastHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(87) Project +Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] + +(88) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(89) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : isnotnull(n_nationkey#X) + +(90) BroadcastExchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=X] + +(91) BroadcastHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(92) Project +Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] + +(93) HashAggregate +Input [3]: [nation#X, o_year#X, amount#X] +Keys [2]: [nation#X, o_year#X] +Functions [1]: [partial_sum(amount#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(94) Exchange +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(95) HashAggregate +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] +Keys [2]: [nation#X, o_year#X] +Functions [1]: [sum(amount#X)] +Aggregate Attributes [1]: [sum(amount#X)#X] +Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] + +(96) Exchange +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(97) Sort +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 + +(98) AdaptiveSparkPlan +Output [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/1.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/1.txt new file mode 100644 index 000000000000..634f26c86f24 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/1.txt @@ -0,0 +1,154 @@ +== Physical Plan == +AdaptiveSparkPlan (28) ++- == Final Plan == + VeloxColumnarToRowExec (19) + +- ^ SortExecTransformer (17) + +- ^ InputIteratorTransformer (16) + +- ^ InputAdapter (15) + +- ^ ShuffleQueryStage (14), Statistics(X) + +- ColumnarExchange (13) + +- ^ RegularHashAggregateExecTransformer (11) + +- ^ InputIteratorTransformer (10) + +- ^ InputAdapter (9) + +- ^ ShuffleQueryStage (8), Statistics(X) + +- ColumnarExchange (7) + +- ^ ProjectExecTransformer (5) + +- ^ FlushableHashAggregateExecTransformer (4) + +- ^ ProjectExecTransformer (3) + +- ^ FilterExecTransformer (2) + +- ^ Scan parquet (1) ++- == Initial Plan == + Sort (27) + +- Exchange (26) + +- HashAggregate (25) + +- Exchange (24) + +- HashAggregate (23) + +- Project (22) + +- Filter (21) + +- Scan parquet (20) + + +(1) Scan parquet +Output [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), LessThanOrEqual(l_shipdate,1998-09-02)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] +Arguments: (isnotnull(l_shipdate#X) AND (l_shipdate#X <= 1998-09-02)) + +(3) ProjectExecTransformer +Output [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X, ((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)) AS _pre_X#X] +Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] + +(4) FlushableHashAggregateExecTransformer +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, _pre_X#X, _pre_X#X] +Keys [2]: [l_returnflag#X, l_linestatus#X] +Functions [8]: [partial_sum(l_quantity#X), partial_sum(l_extendedprice#X), partial_sum(_pre_X#X), partial_sum(_pre_X#X), partial_avg(l_quantity#X), partial_avg(l_extendedprice#X), partial_avg(l_discount#X), partial_count(1)] +Aggregate Attributes [15]: [sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Results [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(5) ProjectExecTransformer +Output [18]: [hash(l_returnflag#X, l_linestatus#X, 42) AS hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(6) WholeStageCodegenTransformer (X) +Input [18]: [hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Arguments: false + +(7) ColumnarExchange +Input [18]: [hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Arguments: hashpartitioning(l_returnflag#X, l_linestatus#X, 1), ENSURE_REQUIREMENTS, [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X], [plan_id=X], [id=#X] + +(8) ShuffleQueryStage +Output [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Arguments: X + +(9) InputAdapter +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(10) InputIteratorTransformer +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(11) RegularHashAggregateExecTransformer +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Keys [2]: [l_returnflag#X, l_linestatus#X] +Functions [8]: [sum(l_quantity#X), sum(l_extendedprice#X), sum((l_extendedprice#X * (1 - l_discount#X))), sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), avg(l_quantity#X), avg(l_extendedprice#X), avg(l_discount#X), count(1)] +Aggregate Attributes [8]: [sum(l_quantity#X)#X, sum(l_extendedprice#X)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X, avg(l_quantity#X)#X, avg(l_extendedprice#X)#X, avg(l_discount#X)#X, count(1)#X] +Results [10]: [l_returnflag#X, l_linestatus#X, sum(l_quantity#X)#X AS sum_qty#X, sum(l_extendedprice#X)#X AS sum_base_price#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS sum_disc_price#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X AS sum_charge#X, avg(l_quantity#X)#X AS avg_qty#X, avg(l_extendedprice#X)#X AS avg_price#X, avg(l_discount#X)#X AS avg_disc#X, count(1)#X AS count_order#X] + +(12) WholeStageCodegenTransformer (X) +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: false + +(13) ColumnarExchange +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: rangepartitioning(l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: X + +(15) InputAdapter +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] + +(16) InputIteratorTransformer +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] + +(17) SortExecTransformer +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: [l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST], true, 0 + +(18) WholeStageCodegenTransformer (X) +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: false + +(19) VeloxColumnarToRowExec +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] + +(20) Scan parquet +Output [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), LessThanOrEqual(l_shipdate,1998-09-02)] +ReadSchema: struct + +(21) Filter +Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] +Condition : (isnotnull(l_shipdate#X) AND (l_shipdate#X <= 1998-09-02)) + +(22) Project +Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X] +Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X] + +(23) HashAggregate +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X] +Keys [2]: [l_returnflag#X, l_linestatus#X] +Functions [8]: [partial_sum(l_quantity#X), partial_sum(l_extendedprice#X), partial_sum((l_extendedprice#X * (1 - l_discount#X))), partial_sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), partial_avg(l_quantity#X), partial_avg(l_extendedprice#X), partial_avg(l_discount#X), partial_count(1)] +Aggregate Attributes [15]: [sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Results [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] + +(24) Exchange +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Arguments: hashpartitioning(l_returnflag#X, l_linestatus#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(25) HashAggregate +Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X] +Keys [2]: [l_returnflag#X, l_linestatus#X] +Functions [8]: [sum(l_quantity#X), sum(l_extendedprice#X), sum((l_extendedprice#X * (1 - l_discount#X))), sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), avg(l_quantity#X), avg(l_extendedprice#X), avg(l_discount#X), count(1)] +Aggregate Attributes [8]: [sum(l_quantity#X)#X, sum(l_extendedprice#X)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X, avg(l_quantity#X)#X, avg(l_extendedprice#X)#X, avg(l_discount#X)#X, count(1)#X] +Results [10]: [l_returnflag#X, l_linestatus#X, sum(l_quantity#X)#X AS sum_qty#X, sum(l_extendedprice#X)#X AS sum_base_price#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS sum_disc_price#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X AS sum_charge#X, avg(l_quantity#X)#X AS avg_qty#X, avg(l_extendedprice#X)#X AS avg_price#X, avg(l_discount#X)#X AS avg_disc#X, count(1)#X AS count_order#X] + +(26) Exchange +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: rangepartitioning(l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(27) Sort +Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: [l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST], true, 0 + +(28) AdaptiveSparkPlan +Output [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt new file mode 100644 index 000000000000..873710be9a02 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt @@ -0,0 +1,464 @@ +== Physical Plan == +AdaptiveSparkPlan (87) ++- == Final Plan == + VeloxColumnarToRowExec (60) + +- TakeOrderedAndProjectExecTransformer (59) + +- ^ ProjectExecTransformer (57) + +- ^ RegularHashAggregateExecTransformer (56) + +- ^ InputIteratorTransformer (55) + +- ^ InputAdapter (54) + +- ^ ShuffleQueryStage (53), Statistics(X) + +- ColumnarExchange (52) + +- ^ ProjectExecTransformer (50) + +- ^ FlushableHashAggregateExecTransformer (49) + +- ^ ProjectExecTransformer (48) + +- ^ ShuffledHashJoinExecTransformer Inner (47) + :- ^ InputIteratorTransformer (38) + : +- ^ InputAdapter (37) + : +- ^ ShuffleQueryStage (36), Statistics(X) + : +- ColumnarExchange (35) + : +- ^ ProjectExecTransformer (33) + : +- ^ ShuffledHashJoinExecTransformer Inner (32) + : :- ^ InputIteratorTransformer (23) + : : +- ^ InputAdapter (22) + : : +- ^ ShuffleQueryStage (21), Statistics(X) + : : +- ColumnarExchange (20) + : : +- ^ ProjectExecTransformer (18) + : : +- ^ ShuffledHashJoinExecTransformer Inner (17) + : : :- ^ InputIteratorTransformer (8) + : : : +- ^ InputAdapter (7) + : : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : : +- ColumnarExchange (5) + : : : +- ^ ProjectExecTransformer (3) + : : : +- ^ FilterExecTransformer (2) + : : : +- ^ Scan parquet (1) + : : +- ^ InputIteratorTransformer (16) + : : +- ^ InputAdapter (15) + : : +- ^ ShuffleQueryStage (14), Statistics(X) + : : +- ColumnarExchange (13) + : : +- ^ ProjectExecTransformer (11) + : : +- ^ FilterExecTransformer (10) + : : +- ^ Scan parquet (9) + : +- ^ InputIteratorTransformer (31) + : +- ^ InputAdapter (30) + : +- ^ ShuffleQueryStage (29), Statistics(X) + : +- ColumnarExchange (28) + : +- ^ ProjectExecTransformer (26) + : +- ^ FilterExecTransformer (25) + : +- ^ Scan parquet (24) + +- ^ InputIteratorTransformer (46) + +- ^ InputAdapter (45) + +- ^ ShuffleQueryStage (44), Statistics(X) + +- ColumnarExchange (43) + +- ^ ProjectExecTransformer (41) + +- ^ FilterExecTransformer (40) + +- ^ Scan parquet (39) ++- == Initial Plan == + TakeOrderedAndProject (86) + +- HashAggregate (85) + +- Exchange (84) + +- HashAggregate (83) + +- Project (82) + +- ShuffledHashJoin Inner BuildRight (81) + :- Exchange (77) + : +- Project (76) + : +- ShuffledHashJoin Inner BuildRight (75) + : :- Exchange (70) + : : +- Project (69) + : : +- ShuffledHashJoin Inner BuildRight (68) + : : :- Exchange (63) + : : : +- Filter (62) + : : : +- Scan parquet (61) + : : +- Exchange (67) + : : +- Project (66) + : : +- Filter (65) + : : +- Scan parquet (64) + : +- Exchange (74) + : +- Project (73) + : +- Filter (72) + : +- Scan parquet (71) + +- Exchange (80) + +- Filter (79) + +- Scan parquet (78) + + +(1) Scan parquet +Output [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(3) ProjectExecTransformer +Output [8]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] + +(4) WholeStageCodegenTransformer (X) +Input [8]: [hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: false + +(5) ColumnarExchange +Input [8]: [hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: X + +(7) InputAdapter +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] + +(8) InputIteratorTransformer +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] + +(9) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(11) ProjectExecTransformer +Output [3]: [hash(o_custkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(12) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderkey#X, o_custkey#X] +Arguments: false + +(13) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderkey#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_custkey#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [2]: [o_orderkey#X, o_custkey#X] +Arguments: X + +(15) InputAdapter +Input [2]: [o_orderkey#X, o_custkey#X] + +(16) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_custkey#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [9]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] + +(19) WholeStageCodegenTransformer (X) +Input [9]: [hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: false + +(20) ColumnarExchange +Input [9]: [hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X], [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: X + +(22) InputAdapter +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] + +(23) InputIteratorTransformer +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] + +(24) Scan parquet +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] +ReadSchema: struct + +(25) FilterExecTransformer +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] +Arguments: ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) + +(26) ProjectExecTransformer +Output [4]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] + +(27) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(28) ColumnarExchange +Input [4]: [hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(30) InputAdapter +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] + +(31) InputIteratorTransformer +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] + +(32) ShuffledHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(33) ProjectExecTransformer +Output [10]: [hash(c_nationkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] + +(34) WholeStageCodegenTransformer (X) +Input [10]: [hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(35) ColumnarExchange +Input [10]: [hash_partition_key#X, c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(36) ShuffleQueryStage +Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(37) InputAdapter +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] + +(38) InputIteratorTransformer +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] + +(39) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(40) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: isnotnull(n_nationkey#X) + +(41) ProjectExecTransformer +Output [3]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X, n_name#X] +Input [2]: [n_nationkey#X, n_name#X] + +(42) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, n_nationkey#X, n_name#X] +Arguments: false + +(43) ColumnarExchange +Input [3]: [hash_partition_key#X, n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X, n_name#X], [plan_id=X], [id=#X] + +(44) ShuffleQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(45) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(46) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(47) ShuffledHashJoinExecTransformer +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(48) ProjectExecTransformer +Output [10]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] + +(49) FlushableHashAggregateExecTransformer +Input [10]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X, _pre_X#X] +Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(50) ProjectExecTransformer +Output [10]: [hash(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 42) AS hash_partition_key#X, c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(51) WholeStageCodegenTransformer (X) +Input [10]: [hash_partition_key#X, c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Arguments: false + +(52) ColumnarExchange +Input [10]: [hash_partition_key#X, c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(53) ShuffleQueryStage +Output [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Arguments: X + +(54) InputAdapter +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(55) InputIteratorTransformer +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(56) RegularHashAggregateExecTransformer +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [8]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(57) ProjectExecTransformer +Output [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Input [8]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(58) WholeStageCodegenTransformer (X) +Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Arguments: false + +(59) TakeOrderedAndProjectExecTransformer +Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X], 0 + +(60) VeloxColumnarToRowExec +Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] + +(61) Scan parquet +Output [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(62) Filter +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(63) Exchange +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(64) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(65) Filter +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(66) Project +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(67) Exchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(68) ShuffledHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(69) Project +Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] + +(70) Exchange +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(71) Scan parquet +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] +ReadSchema: struct + +(72) Filter +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] +Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) + +(73) Project +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] + +(74) Exchange +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(75) ShuffledHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(76) Project +Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] + +(77) Exchange +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(78) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(79) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : isnotnull(n_nationkey#X) + +(80) Exchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(81) ShuffledHashJoin +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(82) Project +Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] + +(83) HashAggregate +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] +Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] + +(84) Exchange +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(85) HashAggregate +Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] +Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] + +(86) TakeOrderedAndProject +Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] + +(87) AdaptiveSparkPlan +Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt new file mode 100644 index 000000000000..bfe059a11c11 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt @@ -0,0 +1,647 @@ +== Physical Plan == +AdaptiveSparkPlan (72) ++- == Final Plan == + VeloxColumnarToRowExec (50) + +- ^ SortExecTransformer (48) + +- ^ InputIteratorTransformer (47) + +- ^ InputAdapter (46) + +- ^ ShuffleQueryStage (45), Statistics(X) + +- ColumnarExchange (44) + +- ^ FilterExecTransformer (42) + +- ^ RegularHashAggregateExecTransformer (41) + +- ^ InputIteratorTransformer (40) + +- ^ InputAdapter (39) + +- ^ ShuffleQueryStage (38), Statistics(X) + +- ColumnarExchange (37) + +- ^ ProjectExecTransformer (35) + +- ^ FlushableHashAggregateExecTransformer (34) + +- ^ ProjectExecTransformer (33) + +- ^ ShuffledHashJoinExecTransformer Inner (32) + :- ^ InputIteratorTransformer (23) + : +- ^ InputAdapter (22) + : +- ^ ShuffleQueryStage (21), Statistics(X) + : +- ColumnarExchange (20) + : +- ^ ProjectExecTransformer (18) + : +- ^ ShuffledHashJoinExecTransformer Inner (17) + : :- ^ InputIteratorTransformer (8) + : : +- ^ InputAdapter (7) + : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : +- ColumnarExchange (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (16) + : +- ^ InputAdapter (15) + : +- ^ ShuffleQueryStage (14), Statistics(X) + : +- ColumnarExchange (13) + : +- ^ ProjectExecTransformer (11) + : +- ^ FilterExecTransformer (10) + : +- ^ Scan parquet (9) + +- ^ InputIteratorTransformer (31) + +- ^ InputAdapter (30) + +- ^ ShuffleQueryStage (29), Statistics(X) + +- ColumnarExchange (28) + +- ^ ProjectExecTransformer (26) + +- ^ FilterExecTransformer (25) + +- ^ Scan parquet (24) ++- == Initial Plan == + Sort (71) + +- Exchange (70) + +- Filter (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- Project (65) + +- ShuffledHashJoin Inner BuildRight (64) + :- Exchange (59) + : +- Project (58) + : +- ShuffledHashJoin Inner BuildRight (57) + : :- Exchange (53) + : : +- Filter (52) + : : +- Scan parquet (51) + : +- Exchange (56) + : +- Filter (55) + : +- Scan parquet (54) + +- Exchange (63) + +- Project (62) + +- Filter (61) + +- Scan parquet (60) + + +(1) Scan parquet +Output [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: isnotnull(ps_suppkey#X) + +(3) ProjectExecTransformer +Output [5]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] + +(4) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: false + +(5) ColumnarExchange +Input [5]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: X + +(7) InputAdapter +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] + +(8) InputIteratorTransformer +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] + +(9) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(11) ProjectExecTransformer +Output [3]: [hash(s_suppkey#X, 42) AS hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Input [2]: [s_suppkey#X, s_nationkey#X] + +(12) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: false + +(13) ColumnarExchange +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [s_suppkey#X, s_nationkey#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(15) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(16) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [5]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] + +(19) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: false + +(20) ColumnarExchange +Input [5]: [hash_partition_key#X, ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: X + +(22) InputAdapter +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] + +(23) InputIteratorTransformer +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] + +(24) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] +ReadSchema: struct + +(25) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) + +(26) ProjectExecTransformer +Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(27) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, n_nationkey#X] +Arguments: false + +(28) ColumnarExchange +Input [2]: [hash_partition_key#X, n_nationkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [1]: [n_nationkey#X] +Arguments: X + +(30) InputAdapter +Input [1]: [n_nationkey#X] + +(31) InputIteratorTransformer +Input [1]: [n_nationkey#X] + +(32) ShuffledHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(33) ProjectExecTransformer +Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] +Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] + +(34) FlushableHashAggregateExecTransformer +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, _pre_X#X] +Keys [1]: [ps_partkey#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(35) ProjectExecTransformer +Output [4]: [hash(ps_partkey#X, 42) AS hash_partition_key#X, ps_partkey#X, sum#X, isEmpty#X] +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(36) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, ps_partkey#X, sum#X, isEmpty#X] +Arguments: false + +(37) ColumnarExchange +Input [4]: [hash_partition_key#X, ps_partkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [ps_partkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(38) ShuffleQueryStage +Output [3]: [ps_partkey#X, sum#X, isEmpty#X] +Arguments: X + +(39) InputAdapter +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(40) InputIteratorTransformer +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(41) RegularHashAggregateExecTransformer +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] +Keys [1]: [ps_partkey#X] +Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] +Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] + +(42) FilterExecTransformer +Input [2]: [ps_partkey#X, value#X] +Arguments: (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) + +(43) WholeStageCodegenTransformer (X) +Input [2]: [ps_partkey#X, value#X] +Arguments: false + +(44) ColumnarExchange +Input [2]: [ps_partkey#X, value#X] +Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(45) ShuffleQueryStage +Output [2]: [ps_partkey#X, value#X] +Arguments: X + +(46) InputAdapter +Input [2]: [ps_partkey#X, value#X] + +(47) InputIteratorTransformer +Input [2]: [ps_partkey#X, value#X] + +(48) SortExecTransformer +Input [2]: [ps_partkey#X, value#X] +Arguments: [value#X DESC NULLS LAST], true, 0 + +(49) WholeStageCodegenTransformer (X) +Input [2]: [ps_partkey#X, value#X] +Arguments: false + +(50) VeloxColumnarToRowExec +Input [2]: [ps_partkey#X, value#X] + +(51) Scan parquet +Output [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey)] +ReadSchema: struct + +(52) Filter +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Condition : isnotnull(ps_suppkey#X) + +(53) Exchange +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(54) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(55) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(56) Exchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(57) ShuffledHashJoin +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(58) Project +Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] + +(59) Exchange +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(60) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] +ReadSchema: struct + +(61) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) + +(62) Project +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(63) Exchange +Input [1]: [n_nationkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(64) ShuffledHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(65) Project +Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] +Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] + +(66) HashAggregate +Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] +Keys [1]: [ps_partkey#X] +Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [ps_partkey#X, sum#X, isEmpty#X] + +(67) Exchange +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(68) HashAggregate +Input [3]: [ps_partkey#X, sum#X, isEmpty#X] +Keys [1]: [ps_partkey#X] +Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] +Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] + +(69) Filter +Input [2]: [ps_partkey#X, value#X] +Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) + +(70) Exchange +Input [2]: [ps_partkey#X, value#X] +Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(71) Sort +Input [2]: [ps_partkey#X, value#X] +Arguments: [value#X DESC NULLS LAST], true, 0 + +(72) AdaptiveSparkPlan +Output [2]: [ps_partkey#X, value#X] +Arguments: isFinalPlan=true + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery subquery#X, [id=#X] +AdaptiveSparkPlan (120) ++- == Final Plan == + VeloxColumnarToRowExec (102) + +- ^ ProjectExecTransformer (100) + +- ^ RegularHashAggregateExecTransformer (99) + +- ^ RegularHashAggregateExecTransformer (98) + +- ^ ProjectExecTransformer (97) + +- ^ ShuffledHashJoinExecTransformer Inner (96) + :- ^ InputIteratorTransformer (91) + : +- ^ InputAdapter (90) + : +- ^ ShuffleQueryStage (89), Statistics(X) + : +- ColumnarExchange (88) + : +- ^ ProjectExecTransformer (86) + : +- ^ ShuffledHashJoinExecTransformer Inner (85) + : :- ^ InputIteratorTransformer (80) + : : +- ^ InputAdapter (79) + : : +- ^ ShuffleQueryStage (78), Statistics(X) + : : +- ColumnarExchange (77) + : : +- ^ ProjectExecTransformer (75) + : : +- ^ FilterExecTransformer (74) + : : +- ^ Scan parquet (73) + : +- ^ InputIteratorTransformer (84) + : +- ^ InputAdapter (83) + : +- ^ ShuffleQueryStage (82), Statistics(X) + : +- ReusedExchange (81) + +- ^ InputIteratorTransformer (95) + +- ^ InputAdapter (94) + +- ^ ShuffleQueryStage (93), Statistics(X) + +- ReusedExchange (92) ++- == Initial Plan == + HashAggregate (119) + +- HashAggregate (118) + +- Project (117) + +- ShuffledHashJoin Inner BuildRight (116) + :- Exchange (111) + : +- Project (110) + : +- ShuffledHashJoin Inner BuildRight (109) + : :- Exchange (105) + : : +- Filter (104) + : : +- Scan parquet (103) + : +- Exchange (108) + : +- Filter (107) + : +- Scan parquet (106) + +- Exchange (115) + +- Project (114) + +- Filter (113) + +- Scan parquet (112) + + +(73) Scan parquet +Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey)] +ReadSchema: struct + +(74) FilterExecTransformer +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: isnotnull(ps_suppkey#X) + +(75) ProjectExecTransformer +Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] + +(76) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: false + +(77) ColumnarExchange +Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] + +(78) ShuffleQueryStage +Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: X + +(79) InputAdapter +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] + +(80) InputIteratorTransformer +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] + +(81) ReusedExchange [Reuses operator id: 13] +Output [2]: [s_suppkey#X, s_nationkey#X] + +(82) ShuffleQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(83) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(84) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(85) ShuffledHashJoinExecTransformer +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(86) ProjectExecTransformer +Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] + +(87) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: false + +(88) ColumnarExchange +Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] + +(89) ShuffleQueryStage +Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: X + +(90) InputAdapter +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] + +(91) InputIteratorTransformer +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] + +(92) ReusedExchange [Reuses operator id: 28] +Output [1]: [n_nationkey#X] + +(93) ShuffleQueryStage +Output [1]: [n_nationkey#X] +Arguments: X + +(94) InputAdapter +Input [1]: [n_nationkey#X] + +(95) InputIteratorTransformer +Input [1]: [n_nationkey#X] + +(96) ShuffledHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(97) ProjectExecTransformer +Output [3]: [ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] +Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] + +(98) RegularHashAggregateExecTransformer +Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] +Keys: [] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(99) RegularHashAggregateExecTransformer +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] +Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] + +(100) ProjectExecTransformer +Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] +Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] + +(101) WholeStageCodegenTransformer (X) +Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] +Arguments: false + +(102) VeloxColumnarToRowExec +Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] + +(103) Scan parquet +Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey)] +ReadSchema: struct + +(104) Filter +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Condition : isnotnull(ps_suppkey#X) + +(105) Exchange +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(106) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(107) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(108) Exchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(109) ShuffledHashJoin +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(110) Project +Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] + +(111) Exchange +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(112) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] +ReadSchema: struct + +(113) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) + +(114) Project +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(115) Exchange +Input [1]: [n_nationkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(116) ShuffledHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(117) Project +Output [2]: [ps_availqty#X, ps_supplycost#X] +Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] + +(118) HashAggregate +Input [2]: [ps_availqty#X, ps_supplycost#X] +Keys: [] +Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(119) HashAggregate +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] +Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] + +(120) AdaptiveSparkPlan +Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt new file mode 100644 index 000000000000..85c06695e147 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt @@ -0,0 +1,263 @@ +== Physical Plan == +AdaptiveSparkPlan (49) ++- == Final Plan == + VeloxColumnarToRowExec (34) + +- ^ SortExecTransformer (32) + +- ^ InputIteratorTransformer (31) + +- ^ InputAdapter (30) + +- ^ ShuffleQueryStage (29), Statistics(X) + +- ColumnarExchange (28) + +- ^ RegularHashAggregateExecTransformer (26) + +- ^ InputIteratorTransformer (25) + +- ^ InputAdapter (24) + +- ^ ShuffleQueryStage (23), Statistics(X) + +- ColumnarExchange (22) + +- ^ ProjectExecTransformer (20) + +- ^ FlushableHashAggregateExecTransformer (19) + +- ^ ProjectExecTransformer (18) + +- ^ ShuffledHashJoinExecTransformer Inner (17) + :- ^ InputIteratorTransformer (8) + : +- ^ InputAdapter (7) + : +- ^ ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (16) + +- ^ InputAdapter (15) + +- ^ ShuffleQueryStage (14), Statistics(X) + +- ColumnarExchange (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) ++- == Initial Plan == + Sort (48) + +- Exchange (47) + +- HashAggregate (46) + +- Exchange (45) + +- HashAggregate (44) + +- Project (43) + +- ShuffledHashJoin Inner BuildLeft (42) + :- Exchange (37) + : +- Filter (36) + : +- Scan parquet (35) + +- Exchange (41) + +- Project (40) + +- Filter (39) + +- Scan parquet (38) + + +(1) Scan parquet +Output [2]: [o_orderkey#X, o_orderpriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: isnotnull(o_orderkey#X) + +(3) ProjectExecTransformer +Output [3]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_orderpriority#X] +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(4) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderkey#X, o_orderpriority#X] +Arguments: false + +(5) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderkey#X, o_orderpriority#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_orderpriority#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: X + +(7) InputAdapter +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(8) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(9) Scan parquet +Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] +Arguments: ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) + +(11) ProjectExecTransformer +Output [3]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_shipmode#X] +Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] + +(12) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, l_orderkey#X, l_shipmode#X] +Arguments: false + +(13) ColumnarExchange +Input [3]: [hash_partition_key#X, l_orderkey#X, l_shipmode#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_shipmode#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [2]: [l_orderkey#X, l_shipmode#X] +Arguments: X + +(15) InputAdapter +Input [2]: [l_orderkey#X, l_shipmode#X] + +(16) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_shipmode#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [4]: [o_orderpriority#X, l_shipmode#X, CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END AS _pre_X#X, CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END AS _pre_X#X] +Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] + +(19) FlushableHashAggregateExecTransformer +Input [4]: [o_orderpriority#X, l_shipmode#X, _pre_X#X, _pre_X#X] +Keys [1]: [l_shipmode#X] +Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, sum#X] +Results [3]: [l_shipmode#X, sum#X, sum#X] + +(20) ProjectExecTransformer +Output [4]: [hash(l_shipmode#X, 42) AS hash_partition_key#X, l_shipmode#X, sum#X, sum#X] +Input [3]: [l_shipmode#X, sum#X, sum#X] + +(21) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_shipmode#X, sum#X, sum#X] +Arguments: false + +(22) ColumnarExchange +Input [4]: [hash_partition_key#X, l_shipmode#X, sum#X, sum#X] +Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [l_shipmode#X, sum#X, sum#X], [plan_id=X], [id=#X] + +(23) ShuffleQueryStage +Output [3]: [l_shipmode#X, sum#X, sum#X] +Arguments: X + +(24) InputAdapter +Input [3]: [l_shipmode#X, sum#X, sum#X] + +(25) InputIteratorTransformer +Input [3]: [l_shipmode#X, sum#X, sum#X] + +(26) RegularHashAggregateExecTransformer +Input [3]: [l_shipmode#X, sum#X, sum#X] +Keys [1]: [l_shipmode#X] +Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] +Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] + +(27) WholeStageCodegenTransformer (X) +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: false + +(28) ColumnarExchange +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: X + +(30) InputAdapter +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] + +(31) InputIteratorTransformer +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] + +(32) SortExecTransformer +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 + +(33) WholeStageCodegenTransformer (X) +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: false + +(34) VeloxColumnarToRowExec +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] + +(35) Scan parquet +Output [2]: [o_orderkey#X, o_orderpriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey)] +ReadSchema: struct + +(36) Filter +Input [2]: [o_orderkey#X, o_orderpriority#X] +Condition : isnotnull(o_orderkey#X) + +(37) Exchange +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(38) Scan parquet +Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] +ReadSchema: struct + +(39) Filter +Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] +Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) + +(40) Project +Output [2]: [l_orderkey#X, l_shipmode#X] +Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] + +(41) Exchange +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(42) ShuffledHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(43) Project +Output [2]: [o_orderpriority#X, l_shipmode#X] +Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] + +(44) HashAggregate +Input [2]: [o_orderpriority#X, l_shipmode#X] +Keys [1]: [l_shipmode#X] +Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] +Aggregate Attributes [2]: [sum#X, sum#X] +Results [3]: [l_shipmode#X, sum#X, sum#X] + +(45) Exchange +Input [3]: [l_shipmode#X, sum#X, sum#X] +Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(46) HashAggregate +Input [3]: [l_shipmode#X, sum#X, sum#X] +Keys [1]: [l_shipmode#X] +Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] +Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] + +(47) Exchange +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(48) Sort +Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 + +(49) AdaptiveSparkPlan +Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt new file mode 100644 index 000000000000..c3526c707e4b --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt @@ -0,0 +1,288 @@ +== Physical Plan == +AdaptiveSparkPlan (52) ++- == Final Plan == + VeloxColumnarToRowExec (36) + +- ^ SortExecTransformer (34) + +- ^ InputIteratorTransformer (33) + +- ^ InputAdapter (32) + +- ^ ShuffleQueryStage (31), Statistics(X) + +- ColumnarExchange (30) + +- ^ RegularHashAggregateExecTransformer (28) + +- ^ InputIteratorTransformer (27) + +- ^ InputAdapter (26) + +- ^ ShuffleQueryStage (25), Statistics(X) + +- ColumnarExchange (24) + +- ^ ProjectExecTransformer (22) + +- ^ FlushableHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ RegularHashAggregateExecTransformer (19) + +- ^ RegularHashAggregateExecTransformer (18) + +- ^ ProjectExecTransformer (17) + +- ^ ShuffledHashJoinExecTransformer LeftOuter (16) + :- ^ InputIteratorTransformer (7) + : +- ^ InputAdapter (6) + : +- ^ ShuffleQueryStage (5), Statistics(X) + : +- ColumnarExchange (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (15) + +- ^ InputAdapter (14) + +- ^ ShuffleQueryStage (13), Statistics(X) + +- ColumnarExchange (12) + +- ^ ProjectExecTransformer (10) + +- ^ FilterExecTransformer (9) + +- ^ Scan parquet (8) ++- == Initial Plan == + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- HashAggregate (46) + +- HashAggregate (45) + +- Project (44) + +- ShuffledHashJoin LeftOuter BuildRight (43) + :- Exchange (38) + : +- Scan parquet (37) + +- Exchange (42) + +- Project (41) + +- Filter (40) + +- Scan parquet (39) + + +(1) Scan parquet +Output [1]: [c_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(2) ProjectExecTransformer +Output [2]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X] +Input [1]: [c_custkey#X] + +(3) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, c_custkey#X] +Arguments: false + +(4) ColumnarExchange +Input [2]: [hash_partition_key#X, c_custkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X], [plan_id=X], [id=#X] + +(5) ShuffleQueryStage +Output [1]: [c_custkey#X] +Arguments: X + +(6) InputAdapter +Input [1]: [c_custkey#X] + +(7) InputIteratorTransformer +Input [1]: [c_custkey#X] + +(8) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] +ReadSchema: struct + +(9) FilterExecTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] +Arguments: ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) + +(10) ProjectExecTransformer +Output [3]: [hash(o_custkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] + +(11) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderkey#X, o_custkey#X] +Arguments: false + +(12) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderkey#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_custkey#X], [plan_id=X], [id=#X] + +(13) ShuffleQueryStage +Output [2]: [o_orderkey#X, o_custkey#X] +Arguments: X + +(14) InputAdapter +Input [2]: [o_orderkey#X, o_custkey#X] + +(15) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_custkey#X] + +(16) ShuffledHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: LeftOuter +Join condition: None + +(17) ProjectExecTransformer +Output [2]: [c_custkey#X, o_orderkey#X] +Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] + +(18) RegularHashAggregateExecTransformer +Input [2]: [c_custkey#X, o_orderkey#X] +Keys [1]: [c_custkey#X] +Functions [1]: [partial_count(o_orderkey#X)] +Aggregate Attributes [1]: [count#X] +Results [2]: [c_custkey#X, count#X] + +(19) RegularHashAggregateExecTransformer +Input [2]: [c_custkey#X, count#X] +Keys [1]: [c_custkey#X] +Functions [1]: [count(o_orderkey#X)] +Aggregate Attributes [1]: [count(o_orderkey#X)#X] +Results [2]: [c_custkey#X, count(o_orderkey#X)#X] + +(20) ProjectExecTransformer +Output [1]: [count(o_orderkey#X)#X AS c_count#X] +Input [2]: [c_custkey#X, count(o_orderkey#X)#X] + +(21) FlushableHashAggregateExecTransformer +Input [1]: [c_count#X] +Keys [1]: [c_count#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [c_count#X, count#X] + +(22) ProjectExecTransformer +Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] +Input [2]: [c_count#X, count#X] + +(23) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, c_count#X, count#X] +Arguments: false + +(24) ColumnarExchange +Input [3]: [hash_partition_key#X, c_count#X, count#X] +Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [id=#X] + +(25) ShuffleQueryStage +Output [2]: [c_count#X, count#X] +Arguments: X + +(26) InputAdapter +Input [2]: [c_count#X, count#X] + +(27) InputIteratorTransformer +Input [2]: [c_count#X, count#X] + +(28) RegularHashAggregateExecTransformer +Input [2]: [c_count#X, count#X] +Keys [1]: [c_count#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [c_count#X, count(1)#X AS custdist#X] + +(29) WholeStageCodegenTransformer (X) +Input [2]: [c_count#X, custdist#X] +Arguments: false + +(30) ColumnarExchange +Input [2]: [c_count#X, custdist#X] +Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(31) ShuffleQueryStage +Output [2]: [c_count#X, custdist#X] +Arguments: X + +(32) InputAdapter +Input [2]: [c_count#X, custdist#X] + +(33) InputIteratorTransformer +Input [2]: [c_count#X, custdist#X] + +(34) SortExecTransformer +Input [2]: [c_count#X, custdist#X] +Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 + +(35) WholeStageCodegenTransformer (X) +Input [2]: [c_count#X, custdist#X] +Arguments: false + +(36) VeloxColumnarToRowExec +Input [2]: [c_count#X, custdist#X] + +(37) Scan parquet +Output [1]: [c_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(38) Exchange +Input [1]: [c_custkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(39) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] +ReadSchema: struct + +(40) Filter +Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] +Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) + +(41) Project +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] + +(42) Exchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(43) ShuffledHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: LeftOuter +Join condition: None + +(44) Project +Output [2]: [c_custkey#X, o_orderkey#X] +Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] + +(45) HashAggregate +Input [2]: [c_custkey#X, o_orderkey#X] +Keys [1]: [c_custkey#X] +Functions [1]: [partial_count(o_orderkey#X)] +Aggregate Attributes [1]: [count#X] +Results [2]: [c_custkey#X, count#X] + +(46) HashAggregate +Input [2]: [c_custkey#X, count#X] +Keys [1]: [c_custkey#X] +Functions [1]: [count(o_orderkey#X)] +Aggregate Attributes [1]: [count(o_orderkey#X)#X] +Results [1]: [count(o_orderkey#X)#X AS c_count#X] + +(47) HashAggregate +Input [1]: [c_count#X] +Keys [1]: [c_count#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [c_count#X, count#X] + +(48) Exchange +Input [2]: [c_count#X, count#X] +Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(49) HashAggregate +Input [2]: [c_count#X, count#X] +Keys [1]: [c_count#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [c_count#X, count(1)#X AS custdist#X] + +(50) Exchange +Input [2]: [c_count#X, custdist#X] +Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(51) Sort +Input [2]: [c_count#X, custdist#X] +Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 + +(52) AdaptiveSparkPlan +Output [2]: [c_count#X, custdist#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt new file mode 100644 index 000000000000..fd8974a5b6cb --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt @@ -0,0 +1,199 @@ +== Physical Plan == +AdaptiveSparkPlan (35) ++- == Final Plan == + VeloxColumnarToRowExec (23) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ RegularHashAggregateExecTransformer (19) + +- ^ ProjectExecTransformer (18) + +- ^ ShuffledHashJoinExecTransformer Inner (17) + :- ^ InputIteratorTransformer (8) + : +- ^ InputAdapter (7) + : +- ^ ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (16) + +- ^ InputAdapter (15) + +- ^ ShuffleQueryStage (14), Statistics(X) + +- ColumnarExchange (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) ++- == Initial Plan == + HashAggregate (34) + +- HashAggregate (33) + +- Project (32) + +- ShuffledHashJoin Inner BuildRight (31) + :- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Exchange (30) + +- Filter (29) + +- Scan parquet (28) + + +(1) Scan parquet +Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) + +(3) ProjectExecTransformer +Output [4]: [hash(l_partkey#X, 42) AS hash_partition_key#X, l_partkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(4) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(5) ColumnarExchange +Input [4]: [hash_partition_key#X, l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [l_partkey#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(7) InputAdapter +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] + +(8) InputIteratorTransformer +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] + +(9) Scan parquet +Output [2]: [p_partkey#X, p_type#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_partkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [2]: [p_partkey#X, p_type#X] +Arguments: isnotnull(p_partkey#X) + +(11) ProjectExecTransformer +Output [3]: [hash(p_partkey#X, 42) AS hash_partition_key#X, p_partkey#X, p_type#X] +Input [2]: [p_partkey#X, p_type#X] + +(12) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, p_partkey#X, p_type#X] +Arguments: false + +(13) ColumnarExchange +Input [3]: [hash_partition_key#X, p_partkey#X, p_type#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [p_partkey#X, p_type#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [2]: [p_partkey#X, p_type#X] +Arguments: X + +(15) InputAdapter +Input [2]: [p_partkey#X, p_type#X] + +(16) InputIteratorTransformer +Input [2]: [p_partkey#X, p_type#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [5]: [l_extendedprice#X, l_discount#X, p_type#X, CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END AS _pre_X#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] + +(19) RegularHashAggregateExecTransformer +Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] +Keys: [] +Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] +Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] + +(20) RegularHashAggregateExecTransformer +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Keys: [] +Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(21) ProjectExecTransformer +Output [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] +Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(22) WholeStageCodegenTransformer (X) +Input [1]: [promo_revenue#X] +Arguments: false + +(23) VeloxColumnarToRowExec +Input [1]: [promo_revenue#X] + +(24) Scan parquet +Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] +ReadSchema: struct + +(25) Filter +Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) + +(26) Project +Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(27) Exchange +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(28) Scan parquet +Output [2]: [p_partkey#X, p_type#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_partkey)] +ReadSchema: struct + +(29) Filter +Input [2]: [p_partkey#X, p_type#X] +Condition : isnotnull(p_partkey#X) + +(30) Exchange +Input [2]: [p_partkey#X, p_type#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(31) ShuffledHashJoin +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(32) Project +Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] +Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] + +(33) HashAggregate +Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] +Keys: [] +Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] + +(34) HashAggregate +Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Keys: [] +Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] + +(35) AdaptiveSparkPlan +Output [1]: [promo_revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt new file mode 100644 index 000000000000..8fd855a59c42 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt @@ -0,0 +1,391 @@ +== Physical Plan == +AdaptiveSparkPlan (42) ++- == Final Plan == + VeloxColumnarToRowExec (27) + +- AQEShuffleRead (26) + +- ShuffleQueryStage (25), Statistics(X) + +- ColumnarExchange (24) + +- ^ ProjectExecTransformer (22) + +- ^ ShuffledHashJoinExecTransformer Inner (21) + :- ^ InputIteratorTransformer (8) + : +- ^ InputAdapter (7) + : +- ^ ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ FilterExecTransformer (20) + +- ^ RegularHashAggregateExecTransformer (19) + +- ^ InputIteratorTransformer (18) + +- ^ InputAdapter (17) + +- ^ ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- ^ ProjectExecTransformer (13) + +- ^ FlushableHashAggregateExecTransformer (12) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) ++- == Initial Plan == + Sort (41) + +- Exchange (40) + +- Project (39) + +- ShuffledHashJoin Inner BuildLeft (38) + :- Exchange (30) + : +- Filter (29) + : +- Scan parquet (28) + +- Filter (37) + +- HashAggregate (36) + +- Exchange (35) + +- HashAggregate (34) + +- Project (33) + +- Filter (32) + +- Scan parquet (31) + + +(1) Scan parquet +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: isnotnull(s_suppkey#X) + +(3) ProjectExecTransformer +Output [5]: [hash(s_suppkey#X, 42) AS hash_partition_key#X, s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] + +(4) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: false + +(5) ColumnarExchange +Input [5]: [hash_partition_key#X, s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [s_suppkey#X, s_name#X, s_address#X, s_phone#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: X + +(7) InputAdapter +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] + +(8) InputIteratorTransformer +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] + +(9) Scan parquet +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) + +(11) ProjectExecTransformer +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(12) FlushableHashAggregateExecTransformer +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(13) ProjectExecTransformer +Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(14) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: false + +(15) ColumnarExchange +Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(16) ShuffleQueryStage +Output [3]: [l_suppkey#X, sum#X, isEmpty#X] +Arguments: X + +(17) InputAdapter +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(18) InputIteratorTransformer +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(19) RegularHashAggregateExecTransformer +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] + +(20) FilterExecTransformer +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) + +(21) ShuffledHashJoinExecTransformer +Left keys [1]: [s_suppkey#X] +Right keys [1]: [supplier_no#X] +Join type: Inner +Join condition: None + +(22) ProjectExecTransformer +Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] + +(23) WholeStageCodegenTransformer (X) +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: false + +(24) ColumnarExchange +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(25) ShuffleQueryStage +Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: X + +(26) AQEShuffleRead +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: local + +(27) VeloxColumnarToRowExec +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] + +(28) Scan parquet +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey)] +ReadSchema: struct + +(29) Filter +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Condition : isnotnull(s_suppkey#X) + +(30) Exchange +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(31) Scan parquet +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] +ReadSchema: struct + +(32) Filter +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) + +(33) Project +Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(34) HashAggregate +Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(35) Exchange +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(36) HashAggregate +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] + +(37) Filter +Input [2]: [supplier_no#X, total_revenue#X] +Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) + +(38) ShuffledHashJoin +Left keys [1]: [s_suppkey#X] +Right keys [1]: [supplier_no#X] +Join type: Inner +Join condition: None + +(39) Project +Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] + +(40) Exchange +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(41) Sort +Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 + +(42) AdaptiveSparkPlan +Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] +Arguments: isFinalPlan=true + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] +AdaptiveSparkPlan (67) ++- == Final Plan == + VeloxColumnarToRowExec (58) + +- ^ RegularHashAggregateExecTransformer (56) + +- ^ RegularHashAggregateExecTransformer (55) + +- ^ ProjectExecTransformer (54) + +- ^ RegularHashAggregateExecTransformer (53) + +- ^ InputIteratorTransformer (52) + +- ^ InputAdapter (51) + +- ^ ShuffleQueryStage (50), Statistics(X) + +- ColumnarExchange (49) + +- ^ ProjectExecTransformer (47) + +- ^ FlushableHashAggregateExecTransformer (46) + +- ^ ProjectExecTransformer (45) + +- ^ FilterExecTransformer (44) + +- ^ Scan parquet (43) ++- == Initial Plan == + HashAggregate (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Exchange (63) + +- HashAggregate (62) + +- Project (61) + +- Filter (60) + +- Scan parquet (59) + + +(43) Scan parquet +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] +ReadSchema: struct + +(44) FilterExecTransformer +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) + +(45) ProjectExecTransformer +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(46) FlushableHashAggregateExecTransformer +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(47) ProjectExecTransformer +Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(48) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: false + +(49) ColumnarExchange +Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(50) ShuffleQueryStage +Output [3]: [l_suppkey#X, sum#X, isEmpty#X] +Arguments: X + +(51) InputAdapter +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(52) InputIteratorTransformer +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(53) RegularHashAggregateExecTransformer +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(54) ProjectExecTransformer +Output [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] +Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(55) RegularHashAggregateExecTransformer +Input [1]: [total_revenue#X] +Keys: [] +Functions [1]: [partial_max(total_revenue#X)] +Aggregate Attributes [1]: [max#X] +Results [1]: [max#X] + +(56) RegularHashAggregateExecTransformer +Input [1]: [max#X] +Keys: [] +Functions [1]: [max(total_revenue#X)] +Aggregate Attributes [1]: [max(total_revenue#X)#X] +Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] + +(57) WholeStageCodegenTransformer (X) +Input [1]: [max(total_revenue)#X] +Arguments: false + +(58) VeloxColumnarToRowExec +Input [1]: [max(total_revenue)#X] + +(59) Scan parquet +Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] +ReadSchema: struct + +(60) Filter +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) + +(61) Project +Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(62) HashAggregate +Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_suppkey#X, sum#X, isEmpty#X] + +(63) Exchange +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(64) HashAggregate +Input [3]: [l_suppkey#X, sum#X, isEmpty#X] +Keys [1]: [l_suppkey#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] + +(65) HashAggregate +Input [1]: [total_revenue#X] +Keys: [] +Functions [1]: [partial_max(total_revenue#X)] +Aggregate Attributes [1]: [max#X] +Results [1]: [max#X] + +(66) HashAggregate +Input [1]: [max#X] +Keys: [] +Functions [1]: [max(total_revenue#X)] +Aggregate Attributes [1]: [max(total_revenue#X)#X] +Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] + +(67) AdaptiveSparkPlan +Output [1]: [max(total_revenue)#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt new file mode 100644 index 000000000000..498c1e83e15b --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt @@ -0,0 +1,352 @@ +== Physical Plan == +AdaptiveSparkPlan (64) ++- == Final Plan == + VeloxColumnarToRowExec (42) + +- ^ SortExecTransformer (40) + +- ^ InputIteratorTransformer (39) + +- ^ InputAdapter (38) + +- ^ ShuffleQueryStage (37), Statistics(X) + +- ColumnarExchange (36) + +- ^ RegularHashAggregateExecTransformer (34) + +- ^ InputIteratorTransformer (33) + +- ^ InputAdapter (32) + +- ^ ShuffleQueryStage (31), Statistics(X) + +- ColumnarExchange (30) + +- ^ ProjectExecTransformer (28) + +- ^ FlushableHashAggregateExecTransformer (27) + +- ^ RegularHashAggregateExecTransformer (26) + +- ^ InputIteratorTransformer (25) + +- ^ InputAdapter (24) + +- ^ ShuffleQueryStage (23), Statistics(X) + +- ColumnarExchange (22) + +- ^ ProjectExecTransformer (20) + +- ^ FlushableHashAggregateExecTransformer (19) + +- ^ ProjectExecTransformer (18) + +- ^ ShuffledHashJoinExecTransformer Inner (17) + :- ^ InputIteratorTransformer (8) + : +- ^ InputAdapter (7) + : +- ^ ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (16) + +- ^ InputAdapter (15) + +- ^ ShuffleQueryStage (14), Statistics(X) + +- ColumnarExchange (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) ++- == Initial Plan == + Sort (63) + +- Exchange (62) + +- HashAggregate (61) + +- Exchange (60) + +- HashAggregate (59) + +- HashAggregate (58) + +- Exchange (57) + +- HashAggregate (56) + +- Project (55) + +- ShuffledHashJoin Inner BuildRight (54) + :- Exchange (50) + : +- BroadcastHashJoin LeftAnti BuildRight (49) + : :- Filter (44) + : : +- Scan parquet (43) + : +- BroadcastExchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Exchange (53) + +- Filter (52) + +- Scan parquet (51) + + +(1) Scan parquet +Output [2]: [ps_partkey#X, ps_suppkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_partkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: isnotnull(ps_partkey#X) + +(3) ProjectExecTransformer +Output [3]: [hash(ps_partkey#X, 42) AS hash_partition_key#X, ps_partkey#X, ps_suppkey#X] +Input [2]: [ps_partkey#X, ps_suppkey#X] + +(4) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X] +Arguments: false + +(5) ColumnarExchange +Input [3]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X] +Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [ps_partkey#X, ps_suppkey#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: X + +(7) InputAdapter +Input [2]: [ps_partkey#X, ps_suppkey#X] + +(8) InputIteratorTransformer +Input [2]: [ps_partkey#X, ps_suppkey#X] + +(9) Scan parquet +Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) + +(11) ProjectExecTransformer +Output [5]: [hash(p_partkey#X, 42) AS hash_partition_key#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(12) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: false + +(13) ColumnarExchange +Input [5]: [hash_partition_key#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [p_partkey#X, p_brand#X, p_type#X, p_size#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: X + +(15) InputAdapter +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(16) InputIteratorTransformer +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [ps_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(19) FlushableHashAggregateExecTransformer +Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] +Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Functions: [] +Aggregate Attributes: [] +Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(20) ProjectExecTransformer +Output [5]: [hash(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 42) AS hash_partition_key#X, p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(21) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Arguments: false + +(22) ColumnarExchange +Input [5]: [hash_partition_key#X, p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [p_brand#X, p_type#X, p_size#X, ps_suppkey#X], [plan_id=X], [id=#X] + +(23) ShuffleQueryStage +Output [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Arguments: X + +(24) InputAdapter +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(25) InputIteratorTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(26) RegularHashAggregateExecTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Functions: [] +Aggregate Attributes: [] +Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(27) FlushableHashAggregateExecTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Keys [3]: [p_brand#X, p_type#X, p_size#X] +Functions [1]: [partial_count(distinct ps_suppkey#X)] +Aggregate Attributes [1]: [count(ps_suppkey#X)#X] +Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(28) ProjectExecTransformer +Output [5]: [hash(p_brand#X, p_type#X, p_size#X, 42) AS hash_partition_key#X, p_brand#X, p_type#X, p_size#X, count#X] +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(29) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, p_brand#X, p_type#X, p_size#X, count#X] +Arguments: false + +(30) ColumnarExchange +Input [5]: [hash_partition_key#X, p_brand#X, p_type#X, p_size#X, count#X] +Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [p_brand#X, p_type#X, p_size#X, count#X], [plan_id=X], [id=#X] + +(31) ShuffleQueryStage +Output [4]: [p_brand#X, p_type#X, p_size#X, count#X] +Arguments: X + +(32) InputAdapter +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(33) InputIteratorTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(34) RegularHashAggregateExecTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] +Keys [3]: [p_brand#X, p_type#X, p_size#X] +Functions [1]: [count(distinct ps_suppkey#X)] +Aggregate Attributes [1]: [count(ps_suppkey#X)#X] +Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] + +(35) WholeStageCodegenTransformer (X) +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: false + +(36) ColumnarExchange +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(37) ShuffleQueryStage +Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: X + +(38) InputAdapter +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] + +(39) InputIteratorTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] + +(40) SortExecTransformer +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 + +(41) WholeStageCodegenTransformer (X) +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: false + +(42) VeloxColumnarToRowExec +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] + +(43) Scan parquet +Output [2]: [ps_partkey#X, ps_suppkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_partkey)] +ReadSchema: struct + +(44) Filter +Input [2]: [ps_partkey#X, ps_suppkey#X] +Condition : isnotnull(ps_partkey#X) + +(45) Scan parquet +Output [2]: [s_suppkey#X, s_comment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_comment)] +ReadSchema: struct + +(46) Filter +Input [2]: [s_suppkey#X, s_comment#X] +Condition : (isnotnull(s_comment#X) AND s_comment#X LIKE %Customer%Complaints%) + +(47) Project +Output [1]: [s_suppkey#X] +Input [2]: [s_suppkey#X, s_comment#X] + +(48) BroadcastExchange +Input [1]: [s_suppkey#X] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),true), [plan_id=X] + +(49) BroadcastHashJoin +Left keys [1]: [ps_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: LeftAnti +Join condition: None + +(50) Exchange +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(51) Scan parquet +Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] +ReadSchema: struct + +(52) Filter +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) + +(53) Exchange +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(54) ShuffledHashJoin +Left keys [1]: [ps_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(55) Project +Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] + +(56) HashAggregate +Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] +Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Functions: [] +Aggregate Attributes: [] +Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(57) Exchange +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(58) HashAggregate +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Functions: [] +Aggregate Attributes: [] +Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] + +(59) HashAggregate +Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] +Keys [3]: [p_brand#X, p_type#X, p_size#X] +Functions [1]: [partial_count(distinct ps_suppkey#X)] +Aggregate Attributes [1]: [count(ps_suppkey#X)#X] +Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] + +(60) Exchange +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] +Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(61) HashAggregate +Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] +Keys [3]: [p_brand#X, p_type#X, p_size#X] +Functions [1]: [count(distinct ps_suppkey#X)] +Aggregate Attributes [1]: [count(ps_suppkey#X)#X] +Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] + +(62) Exchange +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(63) Sort +Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 + +(64) AdaptiveSparkPlan +Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt new file mode 100644 index 000000000000..f15f42b69cd8 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt @@ -0,0 +1,328 @@ +== Physical Plan == +AdaptiveSparkPlan (57) ++- == Final Plan == + VeloxColumnarToRowExec (37) + +- ^ ProjectExecTransformer (35) + +- ^ RegularHashAggregateExecTransformer (34) + +- ^ RegularHashAggregateExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ ShuffledHashJoinExecTransformer Inner (31) + :- ^ ProjectExecTransformer (18) + : +- ^ ShuffledHashJoinExecTransformer Inner (17) + : :- ^ InputIteratorTransformer (8) + : : +- ^ InputAdapter (7) + : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : +- ColumnarExchange (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (16) + : +- ^ InputAdapter (15) + : +- ^ ShuffleQueryStage (14), Statistics(X) + : +- ColumnarExchange (13) + : +- ^ ProjectExecTransformer (11) + : +- ^ FilterExecTransformer (10) + : +- ^ Scan parquet (9) + +- ^ FilterExecTransformer (30) + +- ^ ProjectExecTransformer (29) + +- ^ RegularHashAggregateExecTransformer (28) + +- ^ InputIteratorTransformer (27) + +- ^ InputAdapter (26) + +- ^ ShuffleQueryStage (25), Statistics(X) + +- ColumnarExchange (24) + +- ^ ProjectExecTransformer (22) + +- ^ FlushableHashAggregateExecTransformer (21) + +- ^ FilterExecTransformer (20) + +- ^ Scan parquet (19) ++- == Initial Plan == + HashAggregate (56) + +- HashAggregate (55) + +- Project (54) + +- ShuffledHashJoin Inner BuildRight (53) + :- Project (46) + : +- ShuffledHashJoin Inner BuildRight (45) + : :- Exchange (40) + : : +- Filter (39) + : : +- Scan parquet (38) + : +- Exchange (44) + : +- Project (43) + : +- Filter (42) + : +- Scan parquet (41) + +- Filter (52) + +- HashAggregate (51) + +- Exchange (50) + +- HashAggregate (49) + +- Filter (48) + +- Scan parquet (47) + + +(1) Scan parquet +Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) + +(3) ProjectExecTransformer +Output [4]: [hash(l_partkey#X, 42) AS hash_partition_key#X, l_partkey#X, l_quantity#X, l_extendedprice#X] +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] + +(4) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: false + +(5) ColumnarExchange +Input [4]: [hash_partition_key#X, l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [l_partkey#X, l_quantity#X, l_extendedprice#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: X + +(7) InputAdapter +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] + +(8) InputIteratorTransformer +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] + +(9) Scan parquet +Output [3]: [p_partkey#X, p_brand#X, p_container#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [3]: [p_partkey#X, p_brand#X, p_container#X] +Arguments: ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) + +(11) ProjectExecTransformer +Output [2]: [hash(p_partkey#X, 42) AS hash_partition_key#X, p_partkey#X] +Input [3]: [p_partkey#X, p_brand#X, p_container#X] + +(12) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, p_partkey#X] +Arguments: false + +(13) ColumnarExchange +Input [2]: [hash_partition_key#X, p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [p_partkey#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(15) InputAdapter +Input [1]: [p_partkey#X] + +(16) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] + +(19) Scan parquet +Output [2]: [l_partkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey)] +ReadSchema: struct + +(20) FilterExecTransformer +Input [2]: [l_partkey#X, l_quantity#X] +Arguments: isnotnull(l_partkey#X) + +(21) FlushableHashAggregateExecTransformer +Input [2]: [l_partkey#X, l_quantity#X] +Keys [1]: [l_partkey#X] +Functions [1]: [partial_avg(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, count#X] +Results [3]: [l_partkey#X, sum#X, count#X] + +(22) ProjectExecTransformer +Output [4]: [hash(l_partkey#X, 42) AS hash_partition_key#X, l_partkey#X, sum#X, count#X] +Input [3]: [l_partkey#X, sum#X, count#X] + +(23) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_partkey#X, sum#X, count#X] +Arguments: false + +(24) ColumnarExchange +Input [4]: [hash_partition_key#X, l_partkey#X, sum#X, count#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [l_partkey#X, sum#X, count#X], [plan_id=X], [id=#X] + +(25) ShuffleQueryStage +Output [3]: [l_partkey#X, sum#X, count#X] +Arguments: X + +(26) InputAdapter +Input [3]: [l_partkey#X, sum#X, count#X] + +(27) InputIteratorTransformer +Input [3]: [l_partkey#X, sum#X, count#X] + +(28) RegularHashAggregateExecTransformer +Input [3]: [l_partkey#X, sum#X, count#X] +Keys [1]: [l_partkey#X] +Functions [1]: [avg(l_quantity#X)] +Aggregate Attributes [1]: [avg(l_quantity#X)#X] +Results [2]: [l_partkey#X, avg(l_quantity#X)#X] + +(29) ProjectExecTransformer +Output [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] +Input [2]: [l_partkey#X, avg(l_quantity#X)#X] + +(30) FilterExecTransformer +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: isnotnull((0.2 * avg(l_quantity))#X) + +(31) ShuffledHashJoinExecTransformer +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) + +(32) ProjectExecTransformer +Output [1]: [l_extendedprice#X] +Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] + +(33) RegularHashAggregateExecTransformer +Input [1]: [l_extendedprice#X] +Keys: [] +Functions [1]: [partial_sum(l_extendedprice#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(34) RegularHashAggregateExecTransformer +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum(l_extendedprice#X)] +Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] +Results [1]: [sum(l_extendedprice#X)#X] + +(35) ProjectExecTransformer +Output [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] +Input [1]: [sum(l_extendedprice#X)#X] + +(36) WholeStageCodegenTransformer (X) +Input [1]: [avg_yearly#X] +Arguments: false + +(37) VeloxColumnarToRowExec +Input [1]: [avg_yearly#X] + +(38) Scan parquet +Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] +ReadSchema: struct + +(39) Filter +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) + +(40) Exchange +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(41) Scan parquet +Output [3]: [p_partkey#X, p_brand#X, p_container#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] +ReadSchema: struct + +(42) Filter +Input [3]: [p_partkey#X, p_brand#X, p_container#X] +Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) + +(43) Project +Output [1]: [p_partkey#X] +Input [3]: [p_partkey#X, p_brand#X, p_container#X] + +(44) Exchange +Input [1]: [p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(45) ShuffledHashJoin +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: None + +(46) Project +Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] + +(47) Scan parquet +Output [2]: [l_partkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey)] +ReadSchema: struct + +(48) Filter +Input [2]: [l_partkey#X, l_quantity#X] +Condition : isnotnull(l_partkey#X) + +(49) HashAggregate +Input [2]: [l_partkey#X, l_quantity#X] +Keys [1]: [l_partkey#X] +Functions [1]: [partial_avg(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, count#X] +Results [3]: [l_partkey#X, sum#X, count#X] + +(50) Exchange +Input [3]: [l_partkey#X, sum#X, count#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(51) HashAggregate +Input [3]: [l_partkey#X, sum#X, count#X] +Keys [1]: [l_partkey#X] +Functions [1]: [avg(l_quantity#X)] +Aggregate Attributes [1]: [avg(l_quantity#X)#X] +Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] + +(52) Filter +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Condition : isnotnull((0.2 * avg(l_quantity))#X) + +(53) ShuffledHashJoin +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) + +(54) Project +Output [1]: [l_extendedprice#X] +Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] + +(55) HashAggregate +Input [1]: [l_extendedprice#X] +Keys: [] +Functions [1]: [partial_sum(l_extendedprice#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(56) HashAggregate +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum(l_extendedprice#X)] +Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] +Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] + +(57) AdaptiveSparkPlan +Output [1]: [avg_yearly#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt new file mode 100644 index 000000000000..ad97ed4013f0 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt @@ -0,0 +1,539 @@ +== Physical Plan == +AdaptiveSparkPlan (97) ++- == Final Plan == + VeloxColumnarToRowExec (64) + +- TakeOrderedAndProjectExecTransformer (63) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ ProjectExecTransformer (59) + +- ^ ShuffledHashJoinExecTransformer Inner (58) + :- ^ InputIteratorTransformer (41) + : +- ^ InputAdapter (40) + : +- ^ ShuffleQueryStage (39), Statistics(X) + : +- ColumnarExchange (38) + : +- ^ ProjectExecTransformer (36) + : +- ^ ShuffledHashJoinExecTransformer Inner (35) + : :- ^ InputIteratorTransformer (8) + : : +- ^ InputAdapter (7) + : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : +- ColumnarExchange (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (34) + : +- ^ InputAdapter (33) + : +- ^ ShuffleQueryStage (32), Statistics(X) + : +- ColumnarExchange (31) + : +- ^ ProjectExecTransformer (29) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi (28) + : :- ^ InputIteratorTransformer (16) + : : +- ^ InputAdapter (15) + : : +- ^ ShuffleQueryStage (14), Statistics(X) + : : +- ColumnarExchange (13) + : : +- ^ ProjectExecTransformer (11) + : : +- ^ FilterExecTransformer (10) + : : +- ^ Scan parquet (9) + : +- ^ ProjectExecTransformer (27) + : +- ^ FilterExecTransformer (26) + : +- ^ RegularHashAggregateExecTransformer (25) + : +- ^ InputIteratorTransformer (24) + : +- ^ InputAdapter (23) + : +- ^ ShuffleQueryStage (22), Statistics(X) + : +- ColumnarExchange (21) + : +- ^ ProjectExecTransformer (19) + : +- ^ FlushableHashAggregateExecTransformer (18) + : +- ^ Scan parquet (17) + +- ^ ShuffledHashJoinExecTransformer LeftSemi (57) + :- ^ InputIteratorTransformer (49) + : +- ^ InputAdapter (48) + : +- ^ ShuffleQueryStage (47), Statistics(X) + : +- ColumnarExchange (46) + : +- ^ ProjectExecTransformer (44) + : +- ^ FilterExecTransformer (43) + : +- ^ Scan parquet (42) + +- ^ ProjectExecTransformer (56) + +- ^ FilterExecTransformer (55) + +- ^ RegularHashAggregateExecTransformer (54) + +- ^ InputIteratorTransformer (53) + +- ^ InputAdapter (52) + +- ^ ShuffleQueryStage (51), Statistics(X) + +- ReusedExchange (50) ++- == Initial Plan == + TakeOrderedAndProject (96) + +- HashAggregate (95) + +- HashAggregate (94) + +- Project (93) + +- ShuffledHashJoin Inner BuildRight (92) + :- Exchange (81) + : +- Project (80) + : +- ShuffledHashJoin Inner BuildLeft (79) + : :- Exchange (67) + : : +- Filter (66) + : : +- Scan parquet (65) + : +- Exchange (78) + : +- ShuffledHashJoin LeftSemi BuildRight (77) + : :- Exchange (70) + : : +- Filter (69) + : : +- Scan parquet (68) + : +- Project (76) + : +- Filter (75) + : +- HashAggregate (74) + : +- Exchange (73) + : +- HashAggregate (72) + : +- Scan parquet (71) + +- ShuffledHashJoin LeftSemi BuildRight (91) + :- Exchange (84) + : +- Filter (83) + : +- Scan parquet (82) + +- Project (90) + +- Filter (89) + +- HashAggregate (88) + +- Exchange (87) + +- HashAggregate (86) + +- Scan parquet (85) + + +(1) Scan parquet +Output [2]: [c_custkey#X, c_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [c_custkey#X, c_name#X] +Arguments: isnotnull(c_custkey#X) + +(3) ProjectExecTransformer +Output [3]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_name#X] +Input [2]: [c_custkey#X, c_name#X] + +(4) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, c_custkey#X, c_name#X] +Arguments: false + +(5) ColumnarExchange +Input [3]: [hash_partition_key#X, c_custkey#X, c_name#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_name#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [2]: [c_custkey#X, c_name#X] +Arguments: X + +(7) InputAdapter +Input [2]: [c_custkey#X, c_name#X] + +(8) InputIteratorTransformer +Input [2]: [c_custkey#X, c_name#X] + +(9) Scan parquet +Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) + +(11) ProjectExecTransformer +Output [5]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(12) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: false + +(13) ColumnarExchange +Input [5]: [hash_partition_key#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: X + +(15) InputAdapter +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(16) InputIteratorTransformer +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(17) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(18) FlushableHashAggregateExecTransformer +Input [2]: [l_orderkey#X, l_quantity#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(19) ProjectExecTransformer +Output [4]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, sum#X, isEmpty#X] +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(20) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_orderkey#X, sum#X, isEmpty#X] +Arguments: false + +(21) ColumnarExchange +Input [4]: [hash_partition_key#X, l_orderkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(22) ShuffleQueryStage +Output [3]: [l_orderkey#X, sum#X, isEmpty#X] +Arguments: X + +(23) InputAdapter +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(24) InputIteratorTransformer +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(25) RegularHashAggregateExecTransformer +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] + +(26) FilterExecTransformer +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] +Arguments: (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) + +(27) ProjectExecTransformer +Output [1]: [l_orderkey#X] +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] + +(28) ShuffledHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(29) ProjectExecTransformer +Output [5]: [hash(o_custkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(30) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: false + +(31) ColumnarExchange +Input [5]: [hash_partition_key#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X], [plan_id=X], [id=#X] + +(32) ShuffleQueryStage +Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: X + +(33) InputAdapter +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(34) InputIteratorTransformer +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(35) ShuffledHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(36) ProjectExecTransformer +Output [6]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(37) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: false + +(38) ColumnarExchange +Input [6]: [hash_partition_key#X, c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X], [plan_id=X], [id=#X] + +(39) ShuffleQueryStage +Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: X + +(40) InputAdapter +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] + +(41) InputIteratorTransformer +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] + +(42) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_orderkey)] +ReadSchema: struct + +(43) FilterExecTransformer +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: isnotnull(l_orderkey#X) + +(44) ProjectExecTransformer +Output [3]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_quantity#X] +Input [2]: [l_orderkey#X, l_quantity#X] + +(45) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, l_orderkey#X, l_quantity#X] +Arguments: false + +(46) ColumnarExchange +Input [3]: [hash_partition_key#X, l_orderkey#X, l_quantity#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_quantity#X], [plan_id=X], [id=#X] + +(47) ShuffleQueryStage +Output [2]: [l_orderkey#X, l_quantity#X] +Arguments: X + +(48) InputAdapter +Input [2]: [l_orderkey#X, l_quantity#X] + +(49) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_quantity#X] + +(50) ReusedExchange [Reuses operator id: 21] +Output [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(51) ShuffleQueryStage +Output [3]: [l_orderkey#X, sum#X, isEmpty#X] +Arguments: X + +(52) InputAdapter +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(53) InputIteratorTransformer +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(54) RegularHashAggregateExecTransformer +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] + +(55) FilterExecTransformer +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] +Arguments: (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) + +(56) ProjectExecTransformer +Output [1]: [l_orderkey#X] +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] + +(57) ShuffledHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(58) ShuffledHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(59) ProjectExecTransformer +Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] +Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] + +(60) RegularHashAggregateExecTransformer +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] +Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] + +(61) RegularHashAggregateExecTransformer +Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] + +(62) WholeStageCodegenTransformer (X) +Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] +Arguments: false + +(63) TakeOrderedAndProjectExecTransformer +Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] +Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 + +(64) VeloxColumnarToRowExec +Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] + +(65) Scan parquet +Output [2]: [c_custkey#X, c_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey)] +ReadSchema: struct + +(66) Filter +Input [2]: [c_custkey#X, c_name#X] +Condition : isnotnull(c_custkey#X) + +(67) Exchange +Input [2]: [c_custkey#X, c_name#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(68) Scan parquet +Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(69) Filter +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) + +(70) Exchange +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(71) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(72) HashAggregate +Input [2]: [l_orderkey#X, l_quantity#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(73) Exchange +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(74) HashAggregate +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] + +(75) Filter +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] +Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) + +(76) Project +Output [1]: [l_orderkey#X] +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] + +(77) ShuffledHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(78) Exchange +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(79) ShuffledHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(80) Project +Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] + +(81) Exchange +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(82) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_orderkey)] +ReadSchema: struct + +(83) Filter +Input [2]: [l_orderkey#X, l_quantity#X] +Condition : isnotnull(l_orderkey#X) + +(84) Exchange +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(85) Scan parquet +Output [2]: [l_orderkey#X, l_quantity#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(86) HashAggregate +Input [2]: [l_orderkey#X, l_quantity#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [l_orderkey#X, sum#X, isEmpty#X] + +(87) Exchange +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(88) HashAggregate +Input [3]: [l_orderkey#X, sum#X, isEmpty#X] +Keys [1]: [l_orderkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] + +(89) Filter +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] +Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) + +(90) Project +Output [1]: [l_orderkey#X] +Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] + +(91) ShuffledHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(92) ShuffledHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(93) Project +Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] +Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] + +(94) HashAggregate +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] +Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] + +(95) HashAggregate +Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] +Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] + +(96) TakeOrderedAndProject +Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] +Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] + +(97) AdaptiveSparkPlan +Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt new file mode 100644 index 000000000000..4b5c20d3a1b1 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt @@ -0,0 +1,194 @@ +== Physical Plan == +AdaptiveSparkPlan (34) ++- == Final Plan == + VeloxColumnarToRowExec (22) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ RegularHashAggregateExecTransformer (19) + +- ^ ProjectExecTransformer (18) + +- ^ ShuffledHashJoinExecTransformer Inner (17) + :- ^ InputIteratorTransformer (8) + : +- ^ InputAdapter (7) + : +- ^ ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (16) + +- ^ InputAdapter (15) + +- ^ ShuffleQueryStage (14), Statistics(X) + +- ColumnarExchange (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) ++- == Initial Plan == + HashAggregate (33) + +- HashAggregate (32) + +- Project (31) + +- ShuffledHashJoin Inner BuildRight (30) + :- Exchange (26) + : +- Project (25) + : +- Filter (24) + : +- Scan parquet (23) + +- Exchange (29) + +- Filter (28) + +- Scan parquet (27) + + +(1) Scan parquet +Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] +ReadSchema: struct + +(2) FilterExecTransformer +Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] +Arguments: ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) + +(3) ProjectExecTransformer +Output [5]: [hash(l_partkey#X, 42) AS hash_partition_key#X, l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] + +(4) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(5) ColumnarExchange +Input [5]: [hash_partition_key#X, l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(7) InputAdapter +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(8) InputIteratorTransformer +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(9) Scan parquet +Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] +ReadSchema: struct + +(10) FilterExecTransformer +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) + +(11) ProjectExecTransformer +Output [5]: [hash(p_partkey#X, 42) AS hash_partition_key#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(12) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: false + +(13) ColumnarExchange +Input [5]: [hash_partition_key#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [p_partkey#X, p_brand#X, p_size#X, p_container#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: X + +(15) InputAdapter +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(16) InputIteratorTransformer +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) + +(18) ProjectExecTransformer +Output [3]: [l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(19) RegularHashAggregateExecTransformer +Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] +Keys: [] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(20) RegularHashAggregateExecTransformer +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] + +(21) WholeStageCodegenTransformer (X) +Input [1]: [revenue#X] +Arguments: false + +(22) VeloxColumnarToRowExec +Input [1]: [revenue#X] + +(23) Scan parquet +Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] +ReadSchema: struct + +(24) Filter +Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] +Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) + +(25) Project +Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] + +(26) Exchange +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(27) Scan parquet +Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] +ReadSchema: struct + +(28) Filter +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) + +(29) Exchange +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(30) ShuffledHashJoin +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: Inner +Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) + +(31) Project +Output [2]: [l_extendedprice#X, l_discount#X] +Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] + +(32) HashAggregate +Input [2]: [l_extendedprice#X, l_discount#X] +Keys: [] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(33) HashAggregate +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] + +(34) AdaptiveSparkPlan +Output [1]: [revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt new file mode 100644 index 000000000000..17f48f8ffa72 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt @@ -0,0 +1,652 @@ +== Physical Plan == +AdaptiveSparkPlan (123) ++- == Final Plan == + VeloxColumnarToRowExec (83) + +- AQEShuffleRead (82) + +- ShuffleQueryStage (81), Statistics(X) + +- ColumnarExchange (80) + +- ^ ProjectExecTransformer (78) + +- ^ ShuffledHashJoinExecTransformer Inner (77) + :- ^ InputIteratorTransformer (68) + : +- ^ InputAdapter (67) + : +- ^ ShuffleQueryStage (66), Statistics(X) + : +- ColumnarExchange (65) + : +- ^ ProjectExecTransformer (63) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi (62) + : :- ^ InputIteratorTransformer (8) + : : +- ^ InputAdapter (7) + : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : +- ColumnarExchange (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (61) + : +- ^ InputAdapter (60) + : +- ^ ShuffleQueryStage (59), Statistics(X) + : +- ColumnarExchange (58) + : +- ^ ProjectExecTransformer (56) + : +- ^ ShuffledHashJoinExecTransformer Inner (55) + : :- ^ InputIteratorTransformer (31) + : : +- ^ InputAdapter (30) + : : +- ^ ShuffleQueryStage (29), Statistics(X) + : : +- ColumnarExchange (28) + : : +- ^ ProjectExecTransformer (26) + : : +- ^ ShuffledHashJoinExecTransformer LeftSemi (25) + : : :- ^ InputIteratorTransformer (16) + : : : +- ^ InputAdapter (15) + : : : +- ^ ShuffleQueryStage (14), Statistics(X) + : : : +- ColumnarExchange (13) + : : : +- ^ ProjectExecTransformer (11) + : : : +- ^ FilterExecTransformer (10) + : : : +- ^ Scan parquet (9) + : : +- ^ InputIteratorTransformer (24) + : : +- ^ InputAdapter (23) + : : +- ^ ShuffleQueryStage (22), Statistics(X) + : : +- ColumnarExchange (21) + : : +- ^ ProjectExecTransformer (19) + : : +- ^ FilterExecTransformer (18) + : : +- ^ Scan parquet (17) + : +- ^ InputIteratorTransformer (54) + : +- ^ InputAdapter (53) + : +- ^ ShuffleQueryStage (52), Statistics(X) + : +- ColumnarExchange (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ ProjectExecTransformer (47) + : +- ^ RegularHashAggregateExecTransformer (46) + : +- ^ RegularHashAggregateExecTransformer (45) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi (44) + : :- ^ InputIteratorTransformer (39) + : : +- ^ InputAdapter (38) + : : +- ^ ShuffleQueryStage (37), Statistics(X) + : : +- ColumnarExchange (36) + : : +- ^ ProjectExecTransformer (34) + : : +- ^ FilterExecTransformer (33) + : : +- ^ Scan parquet (32) + : +- ^ InputIteratorTransformer (43) + : +- ^ InputAdapter (42) + : +- ^ ShuffleQueryStage (41), Statistics(X) + : +- ReusedExchange (40) + +- ^ InputIteratorTransformer (76) + +- ^ InputAdapter (75) + +- ^ ShuffleQueryStage (74), Statistics(X) + +- ColumnarExchange (73) + +- ^ ProjectExecTransformer (71) + +- ^ FilterExecTransformer (70) + +- ^ Scan parquet (69) ++- == Initial Plan == + Sort (122) + +- Exchange (121) + +- Project (120) + +- ShuffledHashJoin Inner BuildRight (119) + :- Exchange (114) + : +- Project (113) + : +- ShuffledHashJoin LeftSemi BuildRight (112) + : :- Exchange (86) + : : +- Filter (85) + : : +- Scan parquet (84) + : +- Exchange (111) + : +- Project (110) + : +- ShuffledHashJoin Inner BuildLeft (109) + : :- Exchange (95) + : : +- ShuffledHashJoin LeftSemi BuildRight (94) + : : :- Exchange (89) + : : : +- Filter (88) + : : : +- Scan parquet (87) + : : +- Exchange (93) + : : +- Project (92) + : : +- Filter (91) + : : +- Scan parquet (90) + : +- Exchange (108) + : +- Filter (107) + : +- HashAggregate (106) + : +- HashAggregate (105) + : +- ShuffledHashJoin LeftSemi BuildRight (104) + : :- Exchange (99) + : : +- Project (98) + : : +- Filter (97) + : : +- Scan parquet (96) + : +- Exchange (103) + : +- Project (102) + : +- Filter (101) + : +- Scan parquet (100) + +- Exchange (118) + +- Project (117) + +- Filter (116) + +- Scan parquet (115) + + +(1) Scan parquet +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: isnotnull(s_nationkey#X) + +(3) ProjectExecTransformer +Output [5]: [hash(s_suppkey#X, 42) AS hash_partition_key#X, s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] + +(4) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: false + +(5) ColumnarExchange +Input [5]: [hash_partition_key#X, s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: X + +(7) InputAdapter +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] + +(8) InputIteratorTransformer +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] + +(9) Scan parquet +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) + +(11) ProjectExecTransformer +Output [4]: [hash(ps_partkey#X, 42) AS hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] + +(12) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: false + +(13) ColumnarExchange +Input [4]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [ps_partkey#X, ps_suppkey#X, ps_availqty#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: X + +(15) InputAdapter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] + +(16) InputIteratorTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] + +(17) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] +ReadSchema: struct + +(18) FilterExecTransformer +Input [2]: [p_partkey#X, p_name#X] +Arguments: (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) + +(19) ProjectExecTransformer +Output [2]: [hash(p_partkey#X, 42) AS hash_partition_key#X, p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(20) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, p_partkey#X] +Arguments: false + +(21) ColumnarExchange +Input [2]: [hash_partition_key#X, p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [p_partkey#X], [plan_id=X], [id=#X] + +(22) ShuffleQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(23) InputAdapter +Input [1]: [p_partkey#X] + +(24) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(25) ShuffledHashJoinExecTransformer +Left keys [1]: [ps_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: LeftSemi +Join condition: None + +(26) ProjectExecTransformer +Output [4]: [hash(ps_partkey#X, ps_suppkey#X, 42) AS hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] + +(27) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: false + +(28) ColumnarExchange +Input [4]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_partkey#X, ps_suppkey#X, ps_availqty#X], [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: X + +(30) InputAdapter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] + +(31) InputIteratorTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] + +(32) Scan parquet +Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] +ReadSchema: struct + +(33) FilterExecTransformer +Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] +Arguments: ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) + +(34) ProjectExecTransformer +Output [4]: [hash(l_partkey#X, 42) AS hash_partition_key#X, l_partkey#X, l_suppkey#X, l_quantity#X] +Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] + +(35) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: false + +(36) ColumnarExchange +Input [4]: [hash_partition_key#X, l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [l_partkey#X, l_suppkey#X, l_quantity#X], [plan_id=X], [id=#X] + +(37) ShuffleQueryStage +Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: X + +(38) InputAdapter +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] + +(39) InputIteratorTransformer +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] + +(40) ReusedExchange [Reuses operator id: 21] +Output [1]: [p_partkey#X] + +(41) ShuffleQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(42) InputAdapter +Input [1]: [p_partkey#X] + +(43) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(44) ShuffledHashJoinExecTransformer +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: LeftSemi +Join condition: None + +(45) RegularHashAggregateExecTransformer +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Keys [2]: [l_partkey#X, l_suppkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] + +(46) RegularHashAggregateExecTransformer +Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Keys [2]: [l_partkey#X, l_suppkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] + +(47) ProjectExecTransformer +Output [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] + +(48) FilterExecTransformer +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: isnotnull((0.5 * sum(l_quantity))#X) + +(49) ProjectExecTransformer +Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(50) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: false + +(51) ColumnarExchange +Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [id=#X] + +(52) ShuffleQueryStage +Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: X + +(53) InputAdapter +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(54) InputIteratorTransformer +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(55) ShuffledHashJoinExecTransformer +Left keys [2]: [ps_partkey#X, ps_suppkey#X] +Right keys [2]: [l_partkey#X, l_suppkey#X] +Join type: Inner +Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) + +(56) ProjectExecTransformer +Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(57) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, ps_suppkey#X] +Arguments: false + +(58) ColumnarExchange +Input [2]: [hash_partition_key#X, ps_suppkey#X] +Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [id=#X] + +(59) ShuffleQueryStage +Output [1]: [ps_suppkey#X] +Arguments: X + +(60) InputAdapter +Input [1]: [ps_suppkey#X] + +(61) InputIteratorTransformer +Input [1]: [ps_suppkey#X] + +(62) ShuffledHashJoinExecTransformer +Left keys [1]: [s_suppkey#X] +Right keys [1]: [ps_suppkey#X] +Join type: LeftSemi +Join condition: None + +(63) ProjectExecTransformer +Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] + +(64) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: false + +(65) ColumnarExchange +Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [id=#X] + +(66) ShuffleQueryStage +Output [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: X + +(67) InputAdapter +Input [3]: [s_name#X, s_address#X, s_nationkey#X] + +(68) InputIteratorTransformer +Input [3]: [s_name#X, s_address#X, s_nationkey#X] + +(69) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] +ReadSchema: struct + +(70) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) + +(71) ProjectExecTransformer +Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(72) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, n_nationkey#X] +Arguments: false + +(73) ColumnarExchange +Input [2]: [hash_partition_key#X, n_nationkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [id=#X] + +(74) ShuffleQueryStage +Output [1]: [n_nationkey#X] +Arguments: X + +(75) InputAdapter +Input [1]: [n_nationkey#X] + +(76) InputIteratorTransformer +Input [1]: [n_nationkey#X] + +(77) ShuffledHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(78) ProjectExecTransformer +Output [2]: [s_name#X, s_address#X] +Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] + +(79) WholeStageCodegenTransformer (X) +Input [2]: [s_name#X, s_address#X] +Arguments: false + +(80) ColumnarExchange +Input [2]: [s_name#X, s_address#X] +Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(81) ShuffleQueryStage +Output [2]: [s_name#X, s_address#X] +Arguments: X + +(82) AQEShuffleRead +Input [2]: [s_name#X, s_address#X] +Arguments: local + +(83) VeloxColumnarToRowExec +Input [2]: [s_name#X, s_address#X] + +(84) Scan parquet +Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_nationkey)] +ReadSchema: struct + +(85) Filter +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Condition : isnotnull(s_nationkey#X) + +(86) Exchange +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(87) Scan parquet +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] +ReadSchema: struct + +(88) Filter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) + +(89) Exchange +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(90) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] +ReadSchema: struct + +(91) Filter +Input [2]: [p_partkey#X, p_name#X] +Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) + +(92) Project +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(93) Exchange +Input [1]: [p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(94) ShuffledHashJoin +Left keys [1]: [ps_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: LeftSemi +Join condition: None + +(95) Exchange +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(96) Scan parquet +Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] +ReadSchema: struct + +(97) Filter +Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] +Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) + +(98) Project +Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] + +(99) Exchange +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(100) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] +ReadSchema: struct + +(101) Filter +Input [2]: [p_partkey#X, p_name#X] +Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) + +(102) Project +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(103) Exchange +Input [1]: [p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(104) ShuffledHashJoin +Left keys [1]: [l_partkey#X] +Right keys [1]: [p_partkey#X] +Join type: LeftSemi +Join condition: None + +(105) HashAggregate +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Keys [2]: [l_partkey#X, l_suppkey#X] +Functions [1]: [partial_sum(l_quantity#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] + +(106) HashAggregate +Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] +Keys [2]: [l_partkey#X, l_suppkey#X] +Functions [1]: [sum(l_quantity#X)] +Aggregate Attributes [1]: [sum(l_quantity#X)#X] +Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(107) Filter +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Condition : isnotnull((0.5 * sum(l_quantity))#X) + +(108) Exchange +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(109) ShuffledHashJoin +Left keys [2]: [ps_partkey#X, ps_suppkey#X] +Right keys [2]: [l_partkey#X, l_suppkey#X] +Join type: Inner +Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) + +(110) Project +Output [1]: [ps_suppkey#X] +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] + +(111) Exchange +Input [1]: [ps_suppkey#X] +Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(112) ShuffledHashJoin +Left keys [1]: [s_suppkey#X] +Right keys [1]: [ps_suppkey#X] +Join type: LeftSemi +Join condition: None + +(113) Project +Output [3]: [s_name#X, s_address#X, s_nationkey#X] +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] + +(114) Exchange +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(115) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] +ReadSchema: struct + +(116) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) + +(117) Project +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(118) Exchange +Input [1]: [n_nationkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(119) ShuffledHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(120) Project +Output [2]: [s_name#X, s_address#X] +Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] + +(121) Exchange +Input [2]: [s_name#X, s_address#X] +Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(122) Sort +Input [2]: [s_name#X, s_address#X] +Arguments: [s_name#X ASC NULLS FIRST], true, 0 + +(123) AdaptiveSparkPlan +Output [2]: [s_name#X, s_address#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt new file mode 100644 index 000000000000..7b5d71ec6d26 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt @@ -0,0 +1,628 @@ +== Physical Plan == +AdaptiveSparkPlan (118) ++- == Final Plan == + VeloxColumnarToRowExec (81) + +- ^ RegularHashAggregateExecTransformer (79) + +- ^ InputIteratorTransformer (78) + +- ^ InputAdapter (77) + +- ^ ShuffleQueryStage (76), Statistics(X) + +- ColumnarExchange (75) + +- ^ ProjectExecTransformer (73) + +- ^ FlushableHashAggregateExecTransformer (72) + +- ^ ProjectExecTransformer (71) + +- ^ ShuffledHashJoinExecTransformer Inner (70) + :- ^ InputIteratorTransformer (61) + : +- ^ InputAdapter (60) + : +- ^ ShuffleQueryStage (59), Statistics(X) + : +- ColumnarExchange (58) + : +- ^ ProjectExecTransformer (56) + : +- ^ ShuffledHashJoinExecTransformer Inner (55) + : :- ^ InputIteratorTransformer (46) + : : +- ^ InputAdapter (45) + : : +- ^ ShuffleQueryStage (44), Statistics(X) + : : +- ColumnarExchange (43) + : : +- ^ ProjectExecTransformer (41) + : : +- ^ ShuffledHashJoinExecTransformer Inner (40) + : : :- ^ InputIteratorTransformer (8) + : : : +- ^ InputAdapter (7) + : : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : : +- ColumnarExchange (5) + : : : +- ^ ProjectExecTransformer (3) + : : : +- ^ FilterExecTransformer (2) + : : : +- ^ Scan parquet (1) + : : +- ^ InputIteratorTransformer (39) + : : +- ^ InputAdapter (38) + : : +- ^ ShuffleQueryStage (37), Statistics(X) + : : +- ColumnarExchange (36) + : : +- ^ ProjectExecTransformer (34) + : : +- ^ ShuffledHashJoinExecTransformer LeftAnti (33) + : : :- ^ ShuffledHashJoinExecTransformer LeftSemi (24) + : : : :- ^ InputIteratorTransformer (16) + : : : : +- ^ InputAdapter (15) + : : : : +- ^ ShuffleQueryStage (14), Statistics(X) + : : : : +- ColumnarExchange (13) + : : : : +- ^ ProjectExecTransformer (11) + : : : : +- ^ FilterExecTransformer (10) + : : : : +- ^ Scan parquet (9) + : : : +- ^ InputIteratorTransformer (23) + : : : +- ^ InputAdapter (22) + : : : +- ^ ShuffleQueryStage (21), Statistics(X) + : : : +- ColumnarExchange (20) + : : : +- ^ ProjectExecTransformer (18) + : : : +- ^ Scan parquet (17) + : : +- ^ InputIteratorTransformer (32) + : : +- ^ InputAdapter (31) + : : +- ^ ShuffleQueryStage (30), Statistics(X) + : : +- ColumnarExchange (29) + : : +- ^ ProjectExecTransformer (27) + : : +- ^ FilterExecTransformer (26) + : : +- ^ Scan parquet (25) + : +- ^ InputIteratorTransformer (54) + : +- ^ InputAdapter (53) + : +- ^ ShuffleQueryStage (52), Statistics(X) + : +- ColumnarExchange (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ Scan parquet (47) + +- ^ InputIteratorTransformer (69) + +- ^ InputAdapter (68) + +- ^ ShuffleQueryStage (67), Statistics(X) + +- ColumnarExchange (66) + +- ^ ProjectExecTransformer (64) + +- ^ FilterExecTransformer (63) + +- ^ Scan parquet (62) ++- == Initial Plan == + TakeOrderedAndProject (117) + +- HashAggregate (116) + +- Exchange (115) + +- HashAggregate (114) + +- Project (113) + +- ShuffledHashJoin Inner BuildRight (112) + :- Exchange (107) + : +- Project (106) + : +- ShuffledHashJoin Inner BuildRight (105) + : :- Exchange (100) + : : +- Project (99) + : : +- ShuffledHashJoin Inner BuildLeft (98) + : : :- Exchange (84) + : : : +- Filter (83) + : : : +- Scan parquet (82) + : : +- Exchange (97) + : : +- ShuffledHashJoin LeftAnti BuildRight (96) + : : :- ShuffledHashJoin LeftSemi BuildRight (91) + : : : :- Exchange (88) + : : : : +- Project (87) + : : : : +- Filter (86) + : : : : +- Scan parquet (85) + : : : +- Exchange (90) + : : : +- Scan parquet (89) + : : +- Exchange (95) + : : +- Project (94) + : : +- Filter (93) + : : +- Scan parquet (92) + : +- Exchange (104) + : +- Project (103) + : +- Filter (102) + : +- Scan parquet (101) + +- Exchange (111) + +- Project (110) + +- Filter (109) + +- Scan parquet (108) + + +(1) Scan parquet +Output [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(3) ProjectExecTransformer +Output [4]: [hash(s_suppkey#X, 42) AS hash_partition_key#X, s_suppkey#X, s_name#X, s_nationkey#X] +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] + +(4) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: false + +(5) ColumnarExchange +Input [4]: [hash_partition_key#X, s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [s_suppkey#X, s_name#X, s_nationkey#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: X + +(7) InputAdapter +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] + +(8) InputIteratorTransformer +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] + +(9) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Arguments: ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(11) ProjectExecTransformer +Output [3]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] + +(12) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Arguments: false + +(13) ColumnarExchange +Input [3]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_suppkey#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [2]: [l_orderkey#X, l_suppkey#X] +Arguments: X + +(15) InputAdapter +Input [2]: [l_orderkey#X, l_suppkey#X] + +(16) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_suppkey#X] + +(17) Scan parquet +Output [2]: [l_orderkey#X, l_suppkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(18) ProjectExecTransformer +Output [3]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Input [2]: [l_orderkey#X, l_suppkey#X] + +(19) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Arguments: false + +(20) ColumnarExchange +Input [3]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_suppkey#X], [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [2]: [l_orderkey#X, l_suppkey#X] +Arguments: X + +(22) InputAdapter +Input [2]: [l_orderkey#X, l_suppkey#X] + +(23) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_suppkey#X] + +(24) ShuffledHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: NOT (l_suppkey#X = l_suppkey#X) + +(25) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] +ReadSchema: struct + +(26) FilterExecTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Arguments: ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) + +(27) ProjectExecTransformer +Output [3]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] + +(28) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Arguments: false + +(29) ColumnarExchange +Input [3]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_suppkey#X], [plan_id=X], [id=#X] + +(30) ShuffleQueryStage +Output [2]: [l_orderkey#X, l_suppkey#X] +Arguments: X + +(31) InputAdapter +Input [2]: [l_orderkey#X, l_suppkey#X] + +(32) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_suppkey#X] + +(33) ShuffledHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftAnti +Join condition: NOT (l_suppkey#X = l_suppkey#X) + +(34) ProjectExecTransformer +Output [3]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Input [2]: [l_orderkey#X, l_suppkey#X] + +(35) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Arguments: false + +(36) ColumnarExchange +Input [3]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_suppkey#X], [plan_id=X], [id=#X] + +(37) ShuffleQueryStage +Output [2]: [l_orderkey#X, l_suppkey#X] +Arguments: X + +(38) InputAdapter +Input [2]: [l_orderkey#X, l_suppkey#X] + +(39) InputIteratorTransformer +Input [2]: [l_orderkey#X, l_suppkey#X] + +(40) ShuffledHashJoinExecTransformer +Left keys [1]: [s_suppkey#X] +Right keys [1]: [l_suppkey#X] +Join type: Inner +Join condition: None + +(41) ProjectExecTransformer +Output [4]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, s_name#X, s_nationkey#X, l_orderkey#X] +Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] + +(42) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: false + +(43) ColumnarExchange +Input [4]: [hash_partition_key#X, s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_nationkey#X, l_orderkey#X], [plan_id=X], [id=#X] + +(44) ShuffleQueryStage +Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: X + +(45) InputAdapter +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] + +(46) InputIteratorTransformer +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] + +(47) Scan parquet +Output [2]: [o_orderkey#X, o_orderstatus#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] +ReadSchema: struct + +(48) FilterExecTransformer +Input [2]: [o_orderkey#X, o_orderstatus#X] +Arguments: ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) + +(49) ProjectExecTransformer +Output [2]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, o_orderkey#X] +Input [2]: [o_orderkey#X, o_orderstatus#X] + +(50) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, o_orderkey#X] +Arguments: false + +(51) ColumnarExchange +Input [2]: [hash_partition_key#X, o_orderkey#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X], [plan_id=X], [id=#X] + +(52) ShuffleQueryStage +Output [1]: [o_orderkey#X] +Arguments: X + +(53) InputAdapter +Input [1]: [o_orderkey#X] + +(54) InputIteratorTransformer +Input [1]: [o_orderkey#X] + +(55) ShuffledHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(56) ProjectExecTransformer +Output [3]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_nationkey#X] +Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] + +(57) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, s_name#X, s_nationkey#X] +Arguments: false + +(58) ColumnarExchange +Input [3]: [hash_partition_key#X, s_name#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_nationkey#X], [plan_id=X], [id=#X] + +(59) ShuffleQueryStage +Output [2]: [s_name#X, s_nationkey#X] +Arguments: X + +(60) InputAdapter +Input [2]: [s_name#X, s_nationkey#X] + +(61) InputIteratorTransformer +Input [2]: [s_name#X, s_nationkey#X] + +(62) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] +ReadSchema: struct + +(63) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) + +(64) ProjectExecTransformer +Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(65) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, n_nationkey#X] +Arguments: false + +(66) ColumnarExchange +Input [2]: [hash_partition_key#X, n_nationkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [id=#X] + +(67) ShuffleQueryStage +Output [1]: [n_nationkey#X] +Arguments: X + +(68) InputAdapter +Input [1]: [n_nationkey#X] + +(69) InputIteratorTransformer +Input [1]: [n_nationkey#X] + +(70) ShuffledHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(71) ProjectExecTransformer +Output [1]: [s_name#X] +Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] + +(72) FlushableHashAggregateExecTransformer +Input [1]: [s_name#X] +Keys [1]: [s_name#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [s_name#X, count#X] + +(73) ProjectExecTransformer +Output [3]: [hash(s_name#X, 42) AS hash_partition_key#X, s_name#X, count#X] +Input [2]: [s_name#X, count#X] + +(74) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, s_name#X, count#X] +Arguments: false + +(75) ColumnarExchange +Input [3]: [hash_partition_key#X, s_name#X, count#X] +Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [s_name#X, count#X], [plan_id=X], [id=#X] + +(76) ShuffleQueryStage +Output [2]: [s_name#X, count#X] +Arguments: X + +(77) InputAdapter +Input [2]: [s_name#X, count#X] + +(78) InputIteratorTransformer +Input [2]: [s_name#X, count#X] + +(79) RegularHashAggregateExecTransformer +Input [2]: [s_name#X, count#X] +Keys [1]: [s_name#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [s_name#X, count(1)#X AS numwait#X] + +(80) WholeStageCodegenTransformer (X) +Input [2]: [s_name#X, numwait#X] +Arguments: false + +(81) VeloxColumnarToRowExec +Input [2]: [s_name#X, numwait#X] + +(82) Scan parquet +Output [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(83) Filter +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(84) Exchange +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(85) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(86) Filter +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(87) Project +Output [2]: [l_orderkey#X, l_suppkey#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] + +(88) Exchange +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(89) Scan parquet +Output [2]: [l_orderkey#X, l_suppkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(90) Exchange +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(91) ShuffledHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: NOT (l_suppkey#X = l_suppkey#X) + +(92) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] +ReadSchema: struct + +(93) Filter +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] +Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) + +(94) Project +Output [2]: [l_orderkey#X, l_suppkey#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] + +(95) Exchange +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(96) ShuffledHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftAnti +Join condition: NOT (l_suppkey#X = l_suppkey#X) + +(97) Exchange +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(98) ShuffledHashJoin +Left keys [1]: [s_suppkey#X] +Right keys [1]: [l_suppkey#X] +Join type: Inner +Join condition: None + +(99) Project +Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] + +(100) Exchange +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(101) Scan parquet +Output [2]: [o_orderkey#X, o_orderstatus#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] +ReadSchema: struct + +(102) Filter +Input [2]: [o_orderkey#X, o_orderstatus#X] +Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) + +(103) Project +Output [1]: [o_orderkey#X] +Input [2]: [o_orderkey#X, o_orderstatus#X] + +(104) Exchange +Input [1]: [o_orderkey#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(105) ShuffledHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(106) Project +Output [2]: [s_name#X, s_nationkey#X] +Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] + +(107) Exchange +Input [2]: [s_name#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(108) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] +ReadSchema: struct + +(109) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) + +(110) Project +Output [1]: [n_nationkey#X] +Input [2]: [n_nationkey#X, n_name#X] + +(111) Exchange +Input [1]: [n_nationkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(112) ShuffledHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(113) Project +Output [1]: [s_name#X] +Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] + +(114) HashAggregate +Input [1]: [s_name#X] +Keys [1]: [s_name#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [s_name#X, count#X] + +(115) Exchange +Input [2]: [s_name#X, count#X] +Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(116) HashAggregate +Input [2]: [s_name#X, count#X] +Keys [1]: [s_name#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [s_name#X, count(1)#X AS numwait#X] + +(117) TakeOrderedAndProject +Input [2]: [s_name#X, numwait#X] +Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] + +(118) AdaptiveSparkPlan +Output [2]: [s_name#X, numwait#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt new file mode 100644 index 000000000000..86f293b4ff07 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt @@ -0,0 +1,382 @@ +== Physical Plan == +AdaptiveSparkPlan (46) ++- == Final Plan == + VeloxColumnarToRowExec (33) + +- ^ SortExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ^ InputAdapter (29) + +- ^ ShuffleQueryStage (28), Statistics(X) + +- ColumnarExchange (27) + +- ^ RegularHashAggregateExecTransformer (25) + +- ^ InputIteratorTransformer (24) + +- ^ InputAdapter (23) + +- ^ ShuffleQueryStage (22), Statistics(X) + +- ColumnarExchange (21) + +- ^ ProjectExecTransformer (19) + +- ^ FlushableHashAggregateExecTransformer (18) + +- ^ ProjectExecTransformer (17) + +- ^ ShuffledHashJoinExecTransformer LeftAnti (16) + :- ^ InputIteratorTransformer (8) + : +- ^ InputAdapter (7) + : +- ^ ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (15) + +- ^ InputAdapter (14) + +- ^ ShuffleQueryStage (13), Statistics(X) + +- ColumnarExchange (12) + +- ^ ProjectExecTransformer (10) + +- ^ Scan parquet (9) ++- == Initial Plan == + Sort (45) + +- Exchange (44) + +- HashAggregate (43) + +- Exchange (42) + +- HashAggregate (41) + +- Project (40) + +- ShuffledHashJoin LeftAnti BuildRight (39) + :- Exchange (36) + : +- Filter (35) + : +- Scan parquet (34) + +- Exchange (38) + +- Scan parquet (37) + + +(1) Scan parquet +Output [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_acctbal)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) AND (cast(c_acctbal#X as decimal(16,6)) > Subquery subquery#X, [id=#X])) + +(3) ProjectExecTransformer +Output [4]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_phone#X, c_acctbal#X] +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] + +(4) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: false + +(5) ColumnarExchange +Input [4]: [hash_partition_key#X, c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_phone#X, c_acctbal#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: X + +(7) InputAdapter +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] + +(8) InputIteratorTransformer +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] + +(9) Scan parquet +Output [1]: [o_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(10) ProjectExecTransformer +Output [2]: [hash(o_custkey#X, 42) AS hash_partition_key#X, o_custkey#X] +Input [1]: [o_custkey#X] + +(11) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, o_custkey#X] +Arguments: false + +(12) ColumnarExchange +Input [2]: [hash_partition_key#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [o_custkey#X], [plan_id=X], [id=#X] + +(13) ShuffleQueryStage +Output [1]: [o_custkey#X] +Arguments: X + +(14) InputAdapter +Input [1]: [o_custkey#X] + +(15) InputIteratorTransformer +Input [1]: [o_custkey#X] + +(16) ShuffledHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: LeftAnti +Join condition: None + +(17) ProjectExecTransformer +Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] + +(18) FlushableHashAggregateExecTransformer +Input [2]: [cntrycode#X, c_acctbal#X] +Keys [1]: [cntrycode#X] +Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] +Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] +Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(19) ProjectExecTransformer +Output [5]: [hash(cntrycode#X, 42) AS hash_partition_key#X, cntrycode#X, count#X, sum#X, isEmpty#X] +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(20) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, cntrycode#X, count#X, sum#X, isEmpty#X] +Arguments: false + +(21) ColumnarExchange +Input [5]: [hash_partition_key#X, cntrycode#X, count#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [cntrycode#X, count#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(22) ShuffleQueryStage +Output [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] +Arguments: X + +(23) InputAdapter +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(24) InputIteratorTransformer +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(25) RegularHashAggregateExecTransformer +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] +Keys [1]: [cntrycode#X] +Functions [2]: [count(1), sum(c_acctbal#X)] +Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] +Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] + +(26) WholeStageCodegenTransformer (X) +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: false + +(27) ColumnarExchange +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(28) ShuffleQueryStage +Output [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: X + +(29) InputAdapter +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] + +(30) InputIteratorTransformer +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] + +(31) SortExecTransformer +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 + +(32) WholeStageCodegenTransformer (X) +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: false + +(33) VeloxColumnarToRowExec +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] + +(34) Scan parquet +Output [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_acctbal)] +ReadSchema: struct + +(35) Filter +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) AND (cast(c_acctbal#X as decimal(16,6)) > Subquery subquery#X, [id=#X])) + +(36) Exchange +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(37) Scan parquet +Output [1]: [o_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +ReadSchema: struct + +(38) Exchange +Input [1]: [o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(39) ShuffledHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: LeftAnti +Join condition: None + +(40) Project +Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] + +(41) HashAggregate +Input [2]: [cntrycode#X, c_acctbal#X] +Keys [1]: [cntrycode#X] +Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] +Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] +Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] + +(42) Exchange +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(43) HashAggregate +Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] +Keys [1]: [cntrycode#X] +Functions [2]: [count(1), sum(c_acctbal#X)] +Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] +Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] + +(44) Exchange +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(45) Sort +Input [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 + +(46) AdaptiveSparkPlan +Output [3]: [cntrycode#X, numcust#X, totacctbal#X] +Arguments: isFinalPlan=true + +===== Subqueries ===== + +Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#X, [id=#X] +AdaptiveSparkPlan (65) ++- == Final Plan == + VeloxColumnarToRowExec (58) + +- ^ RegularHashAggregateExecTransformer (56) + +- ^ InputIteratorTransformer (55) + +- ^ InputAdapter (54) + +- ^ ShuffleQueryStage (53), Statistics(X) + +- ColumnarExchange (52) + +- ^ FlushableHashAggregateExecTransformer (50) + +- ^ ProjectExecTransformer (49) + +- ^ FilterExecTransformer (48) + +- ^ Scan parquet (47) ++- == Initial Plan == + HashAggregate (64) + +- Exchange (63) + +- HashAggregate (62) + +- Project (61) + +- Filter (60) + +- Scan parquet (59) + + +(47) Scan parquet +Output [2]: [c_phone#X, c_acctbal#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] +ReadSchema: struct + +(48) FilterExecTransformer +Input [2]: [c_phone#X, c_acctbal#X] +Arguments: ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) + +(49) ProjectExecTransformer +Output [1]: [c_acctbal#X] +Input [2]: [c_phone#X, c_acctbal#X] + +(50) FlushableHashAggregateExecTransformer +Input [1]: [c_acctbal#X] +Keys: [] +Functions [1]: [partial_avg(c_acctbal#X)] +Aggregate Attributes [2]: [sum#X, count#X] +Results [2]: [sum#X, count#X] + +(51) WholeStageCodegenTransformer (X) +Input [2]: [sum#X, count#X] +Arguments: false + +(52) ColumnarExchange +Input [2]: [sum#X, count#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(53) ShuffleQueryStage +Output [2]: [sum#X, count#X] +Arguments: X + +(54) InputAdapter +Input [2]: [sum#X, count#X] + +(55) InputIteratorTransformer +Input [2]: [sum#X, count#X] + +(56) RegularHashAggregateExecTransformer +Input [2]: [sum#X, count#X] +Keys: [] +Functions [1]: [avg(c_acctbal#X)] +Aggregate Attributes [1]: [avg(c_acctbal#X)#X] +Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] + +(57) WholeStageCodegenTransformer (X) +Input [1]: [avg(c_acctbal)#X] +Arguments: false + +(58) VeloxColumnarToRowExec +Input [1]: [avg(c_acctbal)#X] + +(59) Scan parquet +Output [2]: [c_phone#X, c_acctbal#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] +ReadSchema: struct + +(60) Filter +Input [2]: [c_phone#X, c_acctbal#X] +Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) + +(61) Project +Output [1]: [c_acctbal#X] +Input [2]: [c_phone#X, c_acctbal#X] + +(62) HashAggregate +Input [1]: [c_acctbal#X] +Keys: [] +Functions [1]: [partial_avg(c_acctbal#X)] +Aggregate Attributes [2]: [sum#X, count#X] +Results [2]: [sum#X, count#X] + +(63) Exchange +Input [2]: [sum#X, count#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] + +(64) HashAggregate +Input [2]: [sum#X, count#X] +Keys: [] +Functions [1]: [avg(c_acctbal#X)] +Aggregate Attributes [1]: [avg(c_acctbal#X)#X] +Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] + +(65) AdaptiveSparkPlan +Output [1]: [avg(c_acctbal)#X] +Arguments: isFinalPlan=true + +Subquery:2 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] +AdaptiveSparkPlan (65) ++- == Final Plan == + VeloxColumnarToRowExec (58) + +- ^ RegularHashAggregateExecTransformer (56) + +- ^ InputIteratorTransformer (55) + +- ^ InputAdapter (54) + +- ^ ShuffleQueryStage (53), Statistics(X) + +- ColumnarExchange (52) + +- ^ FlushableHashAggregateExecTransformer (50) + +- ^ ProjectExecTransformer (49) + +- ^ FilterExecTransformer (48) + +- ^ Scan parquet (47) ++- == Initial Plan == + HashAggregate (64) + +- Exchange (63) + +- HashAggregate (62) + +- Project (61) + +- Filter (60) + +- Scan parquet (59) \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt new file mode 100644 index 000000000000..0c75b0257aba --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt @@ -0,0 +1,323 @@ +== Physical Plan == +AdaptiveSparkPlan (59) ++- == Final Plan == + VeloxColumnarToRowExec (39) + +- TakeOrderedAndProjectExecTransformer (38) + +- ^ ProjectExecTransformer (36) + +- ^ RegularHashAggregateExecTransformer (35) + +- ^ RegularHashAggregateExecTransformer (34) + +- ^ ProjectExecTransformer (33) + +- ^ ShuffledHashJoinExecTransformer Inner (32) + :- ^ InputIteratorTransformer (23) + : +- ^ InputAdapter (22) + : +- ^ ShuffleQueryStage (21), Statistics(X) + : +- ColumnarExchange (20) + : +- ^ ProjectExecTransformer (18) + : +- ^ ShuffledHashJoinExecTransformer Inner (17) + : :- ^ InputIteratorTransformer (8) + : : +- ^ InputAdapter (7) + : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : +- ColumnarExchange (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (16) + : +- ^ InputAdapter (15) + : +- ^ ShuffleQueryStage (14), Statistics(X) + : +- ColumnarExchange (13) + : +- ^ ProjectExecTransformer (11) + : +- ^ FilterExecTransformer (10) + : +- ^ Scan parquet (9) + +- ^ InputIteratorTransformer (31) + +- ^ InputAdapter (30) + +- ^ ShuffleQueryStage (29), Statistics(X) + +- ColumnarExchange (28) + +- ^ ProjectExecTransformer (26) + +- ^ FilterExecTransformer (25) + +- ^ Scan parquet (24) ++- == Initial Plan == + TakeOrderedAndProject (58) + +- HashAggregate (57) + +- HashAggregate (56) + +- Project (55) + +- ShuffledHashJoin Inner BuildRight (54) + :- Exchange (49) + : +- Project (48) + : +- ShuffledHashJoin Inner BuildLeft (47) + : :- Exchange (43) + : : +- Project (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Exchange (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Exchange (53) + +- Project (52) + +- Filter (51) + +- Scan parquet (50) + + +(1) Scan parquet +Output [2]: [c_custkey#X, c_mktsegment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [c_custkey#X, c_mktsegment#X] +Arguments: ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) + +(3) ProjectExecTransformer +Output [2]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X] +Input [2]: [c_custkey#X, c_mktsegment#X] + +(4) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, c_custkey#X] +Arguments: false + +(5) ColumnarExchange +Input [2]: [hash_partition_key#X, c_custkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [1]: [c_custkey#X] +Arguments: X + +(7) InputAdapter +Input [1]: [c_custkey#X] + +(8) InputIteratorTransformer +Input [1]: [c_custkey#X] + +(9) Scan parquet +Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(11) ProjectExecTransformer +Output [5]: [hash(o_custkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] + +(12) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: false + +(13) ColumnarExchange +Input [5]: [hash_partition_key#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: X + +(15) InputAdapter +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] + +(16) InputIteratorTransformer +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [4]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_orderdate#X, o_shippriority#X] +Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] + +(19) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: false + +(20) ColumnarExchange +Input [4]: [hash_partition_key#X, o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_orderdate#X, o_shippriority#X], [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: X + +(22) InputAdapter +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] + +(23) InputIteratorTransformer +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] + +(24) Scan parquet +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] +ReadSchema: struct + +(25) FilterExecTransformer +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) + +(26) ProjectExecTransformer +Output [4]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(27) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(28) ColumnarExchange +Input [4]: [hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(30) InputAdapter +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] + +(31) InputIteratorTransformer +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] + +(32) ShuffledHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(33) ProjectExecTransformer +Output [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] + +(34) RegularHashAggregateExecTransformer +Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] +Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] + +(35) RegularHashAggregateExecTransformer +Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(36) ProjectExecTransformer +Output [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] +Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] + +(37) WholeStageCodegenTransformer (X) +Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] +Arguments: false + +(38) TakeOrderedAndProjectExecTransformer +Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] +Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 + +(39) VeloxColumnarToRowExec +Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] + +(40) Scan parquet +Output [2]: [c_custkey#X, c_mktsegment#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] +ReadSchema: struct + +(41) Filter +Input [2]: [c_custkey#X, c_mktsegment#X] +Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) + +(42) Project +Output [1]: [c_custkey#X] +Input [2]: [c_custkey#X, c_mktsegment#X] + +(43) Exchange +Input [1]: [c_custkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(44) Scan parquet +Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(45) Filter +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(46) Exchange +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(47) ShuffledHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(48) Project +Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] + +(49) Exchange +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(50) Scan parquet +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] +ReadSchema: struct + +(51) Filter +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) + +(52) Project +Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(53) Exchange +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(54) ShuffledHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(55) Project +Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] + +(56) HashAggregate +Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] + +(57) HashAggregate +Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] +Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] + +(58) TakeOrderedAndProject +Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] +Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] + +(59) AdaptiveSparkPlan +Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt new file mode 100644 index 000000000000..ee47fca61115 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt @@ -0,0 +1,268 @@ +== Physical Plan == +AdaptiveSparkPlan (50) ++- == Final Plan == + VeloxColumnarToRowExec (34) + +- ^ SortExecTransformer (32) + +- ^ InputIteratorTransformer (31) + +- ^ InputAdapter (30) + +- ^ ShuffleQueryStage (29), Statistics(X) + +- ColumnarExchange (28) + +- ^ RegularHashAggregateExecTransformer (26) + +- ^ InputIteratorTransformer (25) + +- ^ InputAdapter (24) + +- ^ ShuffleQueryStage (23), Statistics(X) + +- ColumnarExchange (22) + +- ^ ProjectExecTransformer (20) + +- ^ FlushableHashAggregateExecTransformer (19) + +- ^ ProjectExecTransformer (18) + +- ^ ShuffledHashJoinExecTransformer LeftSemi (17) + :- ^ InputIteratorTransformer (8) + : +- ^ InputAdapter (7) + : +- ^ ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (16) + +- ^ InputAdapter (15) + +- ^ ShuffleQueryStage (14), Statistics(X) + +- ColumnarExchange (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) ++- == Initial Plan == + Sort (49) + +- Exchange (48) + +- HashAggregate (47) + +- Exchange (46) + +- HashAggregate (45) + +- Project (44) + +- ShuffledHashJoin LeftSemi BuildRight (43) + :- Exchange (38) + : +- Project (37) + : +- Filter (36) + : +- Scan parquet (35) + +- Exchange (42) + +- Project (41) + +- Filter (40) + +- Scan parquet (39) + + +(1) Scan parquet +Output [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-07-01), LessThan(o_orderdate,1993-10-01)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] +Arguments: ((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-07-01)) AND (o_orderdate#X < 1993-10-01)) + +(3) ProjectExecTransformer +Output [3]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_orderpriority#X] +Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] + +(4) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderkey#X, o_orderpriority#X] +Arguments: false + +(5) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderkey#X, o_orderpriority#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_orderpriority#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: X + +(7) InputAdapter +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(8) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(9) Scan parquet +Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] +Arguments: ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) + +(11) ProjectExecTransformer +Output [2]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X] +Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] + +(12) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, l_orderkey#X] +Arguments: false + +(13) ColumnarExchange +Input [2]: [hash_partition_key#X, l_orderkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [1]: [l_orderkey#X] +Arguments: X + +(15) InputAdapter +Input [1]: [l_orderkey#X] + +(16) InputIteratorTransformer +Input [1]: [l_orderkey#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(18) ProjectExecTransformer +Output [1]: [o_orderpriority#X] +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(19) FlushableHashAggregateExecTransformer +Input [1]: [o_orderpriority#X] +Keys [1]: [o_orderpriority#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [o_orderpriority#X, count#X] + +(20) ProjectExecTransformer +Output [3]: [hash(o_orderpriority#X, 42) AS hash_partition_key#X, o_orderpriority#X, count#X] +Input [2]: [o_orderpriority#X, count#X] + +(21) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderpriority#X, count#X] +Arguments: false + +(22) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderpriority#X, count#X] +Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [o_orderpriority#X, count#X], [plan_id=X], [id=#X] + +(23) ShuffleQueryStage +Output [2]: [o_orderpriority#X, count#X] +Arguments: X + +(24) InputAdapter +Input [2]: [o_orderpriority#X, count#X] + +(25) InputIteratorTransformer +Input [2]: [o_orderpriority#X, count#X] + +(26) RegularHashAggregateExecTransformer +Input [2]: [o_orderpriority#X, count#X] +Keys [1]: [o_orderpriority#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] + +(27) WholeStageCodegenTransformer (X) +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: false + +(28) ColumnarExchange +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [2]: [o_orderpriority#X, order_count#X] +Arguments: X + +(30) InputAdapter +Input [2]: [o_orderpriority#X, order_count#X] + +(31) InputIteratorTransformer +Input [2]: [o_orderpriority#X, order_count#X] + +(32) SortExecTransformer +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 + +(33) WholeStageCodegenTransformer (X) +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: false + +(34) VeloxColumnarToRowExec +Input [2]: [o_orderpriority#X, order_count#X] + +(35) Scan parquet +Output [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-07-01), LessThan(o_orderdate,1993-10-01)] +ReadSchema: struct + +(36) Filter +Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] +Condition : ((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-07-01)) AND (o_orderdate#X < 1993-10-01)) + +(37) Project +Output [2]: [o_orderkey#X, o_orderpriority#X] +Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] + +(38) Exchange +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(39) Scan parquet +Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] +ReadSchema: struct + +(40) Filter +Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] +Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) + +(41) Project +Output [1]: [l_orderkey#X] +Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] + +(42) Exchange +Input [1]: [l_orderkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(43) ShuffledHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: LeftSemi +Join condition: None + +(44) Project +Output [1]: [o_orderpriority#X] +Input [2]: [o_orderkey#X, o_orderpriority#X] + +(45) HashAggregate +Input [1]: [o_orderpriority#X] +Keys [1]: [o_orderpriority#X] +Functions [1]: [partial_count(1)] +Aggregate Attributes [1]: [count#X] +Results [2]: [o_orderpriority#X, count#X] + +(46) Exchange +Input [2]: [o_orderpriority#X, count#X] +Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(47) HashAggregate +Input [2]: [o_orderpriority#X, count#X] +Keys [1]: [o_orderpriority#X] +Functions [1]: [count(1)] +Aggregate Attributes [1]: [count(1)#X] +Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] + +(48) Exchange +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(49) Sort +Input [2]: [o_orderpriority#X, order_count#X] +Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 + +(50) AdaptiveSparkPlan +Output [2]: [o_orderpriority#X, order_count#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt new file mode 100644 index 000000000000..39a8f483f639 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt @@ -0,0 +1,704 @@ +== Physical Plan == +AdaptiveSparkPlan (134) ++- == Final Plan == + VeloxColumnarToRowExec (94) + +- ^ SortExecTransformer (92) + +- ^ InputIteratorTransformer (91) + +- ^ InputAdapter (90) + +- ^ ShuffleQueryStage (89), Statistics(X) + +- ColumnarExchange (88) + +- ^ RegularHashAggregateExecTransformer (86) + +- ^ InputIteratorTransformer (85) + +- ^ InputAdapter (84) + +- ^ ShuffleQueryStage (83), Statistics(X) + +- ColumnarExchange (82) + +- ^ ProjectExecTransformer (80) + +- ^ FlushableHashAggregateExecTransformer (79) + +- ^ ProjectExecTransformer (78) + +- ^ ShuffledHashJoinExecTransformer Inner (77) + :- ^ InputIteratorTransformer (68) + : +- ^ InputAdapter (67) + : +- ^ ShuffleQueryStage (66), Statistics(X) + : +- ColumnarExchange (65) + : +- ^ ProjectExecTransformer (63) + : +- ^ ShuffledHashJoinExecTransformer Inner (62) + : :- ^ InputIteratorTransformer (53) + : : +- ^ InputAdapter (52) + : : +- ^ ShuffleQueryStage (51), Statistics(X) + : : +- ColumnarExchange (50) + : : +- ^ ProjectExecTransformer (48) + : : +- ^ ShuffledHashJoinExecTransformer Inner (47) + : : :- ^ InputIteratorTransformer (38) + : : : +- ^ InputAdapter (37) + : : : +- ^ ShuffleQueryStage (36), Statistics(X) + : : : +- ColumnarExchange (35) + : : : +- ^ ProjectExecTransformer (33) + : : : +- ^ ShuffledHashJoinExecTransformer Inner (32) + : : : :- ^ InputIteratorTransformer (23) + : : : : +- ^ InputAdapter (22) + : : : : +- ^ ShuffleQueryStage (21), Statistics(X) + : : : : +- ColumnarExchange (20) + : : : : +- ^ ProjectExecTransformer (18) + : : : : +- ^ ShuffledHashJoinExecTransformer Inner (17) + : : : : :- ^ InputIteratorTransformer (8) + : : : : : +- ^ InputAdapter (7) + : : : : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : : : : +- ColumnarExchange (5) + : : : : : +- ^ ProjectExecTransformer (3) + : : : : : +- ^ FilterExecTransformer (2) + : : : : : +- ^ Scan parquet (1) + : : : : +- ^ InputIteratorTransformer (16) + : : : : +- ^ InputAdapter (15) + : : : : +- ^ ShuffleQueryStage (14), Statistics(X) + : : : : +- ColumnarExchange (13) + : : : : +- ^ ProjectExecTransformer (11) + : : : : +- ^ FilterExecTransformer (10) + : : : : +- ^ Scan parquet (9) + : : : +- ^ InputIteratorTransformer (31) + : : : +- ^ InputAdapter (30) + : : : +- ^ ShuffleQueryStage (29), Statistics(X) + : : : +- ColumnarExchange (28) + : : : +- ^ ProjectExecTransformer (26) + : : : +- ^ FilterExecTransformer (25) + : : : +- ^ Scan parquet (24) + : : +- ^ InputIteratorTransformer (46) + : : +- ^ InputAdapter (45) + : : +- ^ ShuffleQueryStage (44), Statistics(X) + : : +- ColumnarExchange (43) + : : +- ^ ProjectExecTransformer (41) + : : +- ^ FilterExecTransformer (40) + : : +- ^ Scan parquet (39) + : +- ^ InputIteratorTransformer (61) + : +- ^ InputAdapter (60) + : +- ^ ShuffleQueryStage (59), Statistics(X) + : +- ColumnarExchange (58) + : +- ^ ProjectExecTransformer (56) + : +- ^ FilterExecTransformer (55) + : +- ^ Scan parquet (54) + +- ^ InputIteratorTransformer (76) + +- ^ InputAdapter (75) + +- ^ ShuffleQueryStage (74), Statistics(X) + +- ColumnarExchange (73) + +- ^ ProjectExecTransformer (71) + +- ^ FilterExecTransformer (70) + +- ^ Scan parquet (69) ++- == Initial Plan == + Sort (133) + +- Exchange (132) + +- HashAggregate (131) + +- Exchange (130) + +- HashAggregate (129) + +- Project (128) + +- ShuffledHashJoin Inner BuildRight (127) + :- Exchange (122) + : +- Project (121) + : +- ShuffledHashJoin Inner BuildRight (120) + : :- Exchange (116) + : : +- Project (115) + : : +- ShuffledHashJoin Inner BuildRight (114) + : : :- Exchange (110) + : : : +- Project (109) + : : : +- ShuffledHashJoin Inner BuildRight (108) + : : : :- Exchange (104) + : : : : +- Project (103) + : : : : +- ShuffledHashJoin Inner BuildLeft (102) + : : : : :- Exchange (97) + : : : : : +- Filter (96) + : : : : : +- Scan parquet (95) + : : : : +- Exchange (101) + : : : : +- Project (100) + : : : : +- Filter (99) + : : : : +- Scan parquet (98) + : : : +- Exchange (107) + : : : +- Filter (106) + : : : +- Scan parquet (105) + : : +- Exchange (113) + : : +- Filter (112) + : : +- Scan parquet (111) + : +- Exchange (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Exchange (126) + +- Project (125) + +- Filter (124) + +- Scan parquet (123) + + +(1) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(3) ProjectExecTransformer +Output [3]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_nationkey#X] +Input [2]: [c_custkey#X, c_nationkey#X] + +(4) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, c_custkey#X, c_nationkey#X] +Arguments: false + +(5) ColumnarExchange +Input [3]: [hash_partition_key#X, c_custkey#X, c_nationkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_nationkey#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [2]: [c_custkey#X, c_nationkey#X] +Arguments: X + +(7) InputAdapter +Input [2]: [c_custkey#X, c_nationkey#X] + +(8) InputIteratorTransformer +Input [2]: [c_custkey#X, c_nationkey#X] + +(9) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(11) ProjectExecTransformer +Output [3]: [hash(o_custkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(12) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderkey#X, o_custkey#X] +Arguments: false + +(13) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderkey#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_custkey#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [2]: [o_orderkey#X, o_custkey#X] +Arguments: X + +(15) InputAdapter +Input [2]: [o_orderkey#X, o_custkey#X] + +(16) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_custkey#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [3]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, c_nationkey#X, o_orderkey#X] +Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] + +(19) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, c_nationkey#X, o_orderkey#X] +Arguments: false + +(20) ColumnarExchange +Input [3]: [hash_partition_key#X, c_nationkey#X, o_orderkey#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [c_nationkey#X, o_orderkey#X], [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [2]: [c_nationkey#X, o_orderkey#X] +Arguments: X + +(22) InputAdapter +Input [2]: [c_nationkey#X, o_orderkey#X] + +(23) InputIteratorTransformer +Input [2]: [c_nationkey#X, o_orderkey#X] + +(24) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] +ReadSchema: struct + +(25) FilterExecTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) + +(26) ProjectExecTransformer +Output [5]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(27) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(28) ColumnarExchange +Input [5]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(30) InputAdapter +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(31) InputIteratorTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(32) ShuffledHashJoinExecTransformer +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(33) ProjectExecTransformer +Output [5]: [hash(l_suppkey#X, c_nationkey#X, 42) AS hash_partition_key#X, c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(34) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(35) ColumnarExchange +Input [5]: [hash_partition_key#X, c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(36) ShuffleQueryStage +Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(37) InputAdapter +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(38) InputIteratorTransformer +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(39) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(40) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(41) ProjectExecTransformer +Output [3]: [hash(s_suppkey#X, s_nationkey#X, 42) AS hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Input [2]: [s_suppkey#X, s_nationkey#X] + +(42) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: false + +(43) ColumnarExchange +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_suppkey#X, s_nationkey#X], [plan_id=X], [id=#X] + +(44) ShuffleQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(45) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(46) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(47) ShuffledHashJoinExecTransformer +Left keys [2]: [l_suppkey#X, c_nationkey#X] +Right keys [2]: [s_suppkey#X, s_nationkey#X] +Join type: Inner +Join condition: None + +(48) ProjectExecTransformer +Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(49) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: false + +(50) ColumnarExchange +Input [4]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [l_extendedprice#X, l_discount#X, s_nationkey#X], [plan_id=X], [id=#X] + +(51) ShuffleQueryStage +Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: X + +(52) InputAdapter +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] + +(53) InputIteratorTransformer +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] + +(54) Scan parquet +Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] +ReadSchema: struct + +(55) FilterExecTransformer +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) + +(56) ProjectExecTransformer +Output [4]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X, n_name#X, n_regionkey#X] +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] + +(57) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: false + +(58) ColumnarExchange +Input [4]: [hash_partition_key#X, n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X, n_name#X, n_regionkey#X], [plan_id=X], [id=#X] + +(59) ShuffleQueryStage +Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: X + +(60) InputAdapter +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] + +(61) InputIteratorTransformer +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] + +(62) ShuffledHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(63) ProjectExecTransformer +Output [5]: [hash(n_regionkey#X, 42) AS hash_partition_key#X, l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] + +(64) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: false + +(65) ColumnarExchange +Input [5]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X], [plan_id=X], [id=#X] + +(66) ShuffleQueryStage +Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: X + +(67) InputAdapter +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] + +(68) InputIteratorTransformer +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] + +(69) Scan parquet +Output [2]: [r_regionkey#X, r_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] +ReadSchema: struct + +(70) FilterExecTransformer +Input [2]: [r_regionkey#X, r_name#X] +Arguments: ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) + +(71) ProjectExecTransformer +Output [2]: [hash(r_regionkey#X, 42) AS hash_partition_key#X, r_regionkey#X] +Input [2]: [r_regionkey#X, r_name#X] + +(72) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, r_regionkey#X] +Arguments: false + +(73) ColumnarExchange +Input [2]: [hash_partition_key#X, r_regionkey#X] +Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [r_regionkey#X], [plan_id=X], [id=#X] + +(74) ShuffleQueryStage +Output [1]: [r_regionkey#X] +Arguments: X + +(75) InputAdapter +Input [1]: [r_regionkey#X] + +(76) InputIteratorTransformer +Input [1]: [r_regionkey#X] + +(77) ShuffledHashJoinExecTransformer +Left keys [1]: [n_regionkey#X] +Right keys [1]: [r_regionkey#X] +Join type: Inner +Join condition: None + +(78) ProjectExecTransformer +Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] +Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] + +(79) FlushableHashAggregateExecTransformer +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, _pre_X#X] +Keys [1]: [n_name#X] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [n_name#X, sum#X, isEmpty#X] + +(80) ProjectExecTransformer +Output [4]: [hash(n_name#X, 42) AS hash_partition_key#X, n_name#X, sum#X, isEmpty#X] +Input [3]: [n_name#X, sum#X, isEmpty#X] + +(81) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, n_name#X, sum#X, isEmpty#X] +Arguments: false + +(82) ColumnarExchange +Input [4]: [hash_partition_key#X, n_name#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [n_name#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(83) ShuffleQueryStage +Output [3]: [n_name#X, sum#X, isEmpty#X] +Arguments: X + +(84) InputAdapter +Input [3]: [n_name#X, sum#X, isEmpty#X] + +(85) InputIteratorTransformer +Input [3]: [n_name#X, sum#X, isEmpty#X] + +(86) RegularHashAggregateExecTransformer +Input [3]: [n_name#X, sum#X, isEmpty#X] +Keys [1]: [n_name#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] + +(87) WholeStageCodegenTransformer (X) +Input [2]: [n_name#X, revenue#X] +Arguments: false + +(88) ColumnarExchange +Input [2]: [n_name#X, revenue#X] +Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(89) ShuffleQueryStage +Output [2]: [n_name#X, revenue#X] +Arguments: X + +(90) InputAdapter +Input [2]: [n_name#X, revenue#X] + +(91) InputIteratorTransformer +Input [2]: [n_name#X, revenue#X] + +(92) SortExecTransformer +Input [2]: [n_name#X, revenue#X] +Arguments: [revenue#X DESC NULLS LAST], true, 0 + +(93) WholeStageCodegenTransformer (X) +Input [2]: [n_name#X, revenue#X] +Arguments: false + +(94) VeloxColumnarToRowExec +Input [2]: [n_name#X, revenue#X] + +(95) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(96) Filter +Input [2]: [c_custkey#X, c_nationkey#X] +Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(97) Exchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(98) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] +ReadSchema: struct + +(99) Filter +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) + +(100) Project +Output [2]: [o_orderkey#X, o_custkey#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(101) Exchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(102) ShuffledHashJoin +Left keys [1]: [c_custkey#X] +Right keys [1]: [o_custkey#X] +Join type: Inner +Join condition: None + +(103) Project +Output [2]: [c_nationkey#X, o_orderkey#X] +Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] + +(104) Exchange +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(105) Scan parquet +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] +ReadSchema: struct + +(106) Filter +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) + +(107) Exchange +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(108) ShuffledHashJoin +Left keys [1]: [o_orderkey#X] +Right keys [1]: [l_orderkey#X] +Join type: Inner +Join condition: None + +(109) Project +Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(110) Exchange +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(111) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(112) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(113) Exchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(114) ShuffledHashJoin +Left keys [2]: [l_suppkey#X, c_nationkey#X] +Right keys [2]: [s_suppkey#X, s_nationkey#X] +Join type: Inner +Join condition: None + +(115) Project +Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(116) Exchange +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(117) Scan parquet +Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] +ReadSchema: struct + +(118) Filter +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) + +(119) Exchange +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(120) ShuffledHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(121) Project +Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] + +(122) Exchange +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(123) Scan parquet +Output [2]: [r_regionkey#X, r_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] +ReadSchema: struct + +(124) Filter +Input [2]: [r_regionkey#X, r_name#X] +Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) + +(125) Project +Output [1]: [r_regionkey#X] +Input [2]: [r_regionkey#X, r_name#X] + +(126) Exchange +Input [1]: [r_regionkey#X] +Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(127) ShuffledHashJoin +Left keys [1]: [n_regionkey#X] +Right keys [1]: [r_regionkey#X] +Join type: Inner +Join condition: None + +(128) Project +Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] +Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] + +(129) HashAggregate +Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] +Keys [1]: [n_name#X] +Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [3]: [n_name#X, sum#X, isEmpty#X] + +(130) Exchange +Input [3]: [n_name#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(131) HashAggregate +Input [3]: [n_name#X, sum#X, isEmpty#X] +Keys [1]: [n_name#X] +Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] +Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] + +(132) Exchange +Input [2]: [n_name#X, revenue#X] +Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(133) Sort +Input [2]: [n_name#X, revenue#X] +Arguments: [revenue#X DESC NULLS LAST], true, 0 + +(134) AdaptiveSparkPlan +Output [2]: [n_name#X, revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/6.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/6.txt new file mode 100644 index 000000000000..0882ff9e151c --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/6.txt @@ -0,0 +1,112 @@ +== Physical Plan == +AdaptiveSparkPlan (19) ++- == Final Plan == + VeloxColumnarToRowExec (12) + +- ^ RegularHashAggregateExecTransformer (10) + +- ^ InputIteratorTransformer (9) + +- ^ InputAdapter (8) + +- ^ ShuffleQueryStage (7), Statistics(X) + +- ColumnarExchange (6) + +- ^ FlushableHashAggregateExecTransformer (4) + +- ^ ProjectExecTransformer (3) + +- ^ FilterExecTransformer (2) + +- ^ Scan parquet (1) ++- == Initial Plan == + HashAggregate (18) + +- Exchange (17) + +- HashAggregate (16) + +- Project (15) + +- Filter (14) + +- Scan parquet (13) + + +(1) Scan parquet +Output [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), IsNotNull(l_discount), IsNotNull(l_quantity), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), GreaterThanOrEqual(l_discount,0.05), LessThanOrEqual(l_discount,0.07), LessThan(l_quantity,24.00)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: (((((((isnotnull(l_shipdate#X) AND isnotnull(l_discount#X)) AND isnotnull(l_quantity#X)) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND (l_discount#X >= 0.05)) AND (l_discount#X <= 0.07)) AND (l_quantity#X < 24.00)) + +(3) ProjectExecTransformer +Output [3]: [l_extendedprice#X, l_discount#X, (l_extendedprice#X * l_discount#X) AS _pre_X#X] +Input [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(4) FlushableHashAggregateExecTransformer +Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] +Keys: [] +Functions [1]: [partial_sum(_pre_X#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(5) WholeStageCodegenTransformer (X) +Input [2]: [sum#X, isEmpty#X] +Arguments: false + +(6) ColumnarExchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(7) ShuffleQueryStage +Output [2]: [sum#X, isEmpty#X] +Arguments: X + +(8) InputAdapter +Input [2]: [sum#X, isEmpty#X] + +(9) InputIteratorTransformer +Input [2]: [sum#X, isEmpty#X] + +(10) RegularHashAggregateExecTransformer +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((l_extendedprice#X * l_discount#X))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * l_discount#X))#X] +Results [1]: [sum((l_extendedprice#X * l_discount#X))#X AS revenue#X] + +(11) WholeStageCodegenTransformer (X) +Input [1]: [revenue#X] +Arguments: false + +(12) VeloxColumnarToRowExec +Input [1]: [revenue#X] + +(13) Scan parquet +Output [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), IsNotNull(l_discount), IsNotNull(l_quantity), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), GreaterThanOrEqual(l_discount,0.05), LessThanOrEqual(l_discount,0.07), LessThan(l_quantity,24.00)] +ReadSchema: struct + +(14) Filter +Input [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : (((((((isnotnull(l_shipdate#X) AND isnotnull(l_discount#X)) AND isnotnull(l_quantity#X)) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND (l_discount#X >= 0.05)) AND (l_discount#X <= 0.07)) AND (l_quantity#X < 24.00)) + +(15) Project +Output [2]: [l_extendedprice#X, l_discount#X] +Input [4]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(16) HashAggregate +Input [2]: [l_extendedprice#X, l_discount#X] +Keys: [] +Functions [1]: [partial_sum((l_extendedprice#X * l_discount#X))] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [2]: [sum#X, isEmpty#X] + +(17) Exchange +Input [2]: [sum#X, isEmpty#X] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] + +(18) HashAggregate +Input [2]: [sum#X, isEmpty#X] +Keys: [] +Functions [1]: [sum((l_extendedprice#X * l_discount#X))] +Aggregate Attributes [1]: [sum((l_extendedprice#X * l_discount#X))#X] +Results [1]: [sum((l_extendedprice#X * l_discount#X))#X AS revenue#X] + +(19) AdaptiveSparkPlan +Output [1]: [revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt new file mode 100644 index 000000000000..29c3f048b7f8 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt @@ -0,0 +1,671 @@ +== Physical Plan == +AdaptiveSparkPlan (128) ++- == Final Plan == + VeloxColumnarToRowExec (90) + +- ^ SortExecTransformer (88) + +- ^ InputIteratorTransformer (87) + +- ^ InputAdapter (86) + +- ^ ShuffleQueryStage (85), Statistics(X) + +- ColumnarExchange (84) + +- ^ RegularHashAggregateExecTransformer (82) + +- ^ InputIteratorTransformer (81) + +- ^ InputAdapter (80) + +- ^ ShuffleQueryStage (79), Statistics(X) + +- ColumnarExchange (78) + +- ^ ProjectExecTransformer (76) + +- ^ FlushableHashAggregateExecTransformer (75) + +- ^ ProjectExecTransformer (74) + +- ^ ShuffledHashJoinExecTransformer Inner (73) + :- ^ InputIteratorTransformer (68) + : +- ^ InputAdapter (67) + : +- ^ ShuffleQueryStage (66), Statistics(X) + : +- ColumnarExchange (65) + : +- ^ ProjectExecTransformer (63) + : +- ^ ShuffledHashJoinExecTransformer Inner (62) + : :- ^ InputIteratorTransformer (53) + : : +- ^ InputAdapter (52) + : : +- ^ ShuffleQueryStage (51), Statistics(X) + : : +- ColumnarExchange (50) + : : +- ^ ProjectExecTransformer (48) + : : +- ^ ShuffledHashJoinExecTransformer Inner (47) + : : :- ^ InputIteratorTransformer (38) + : : : +- ^ InputAdapter (37) + : : : +- ^ ShuffleQueryStage (36), Statistics(X) + : : : +- ColumnarExchange (35) + : : : +- ^ ProjectExecTransformer (33) + : : : +- ^ ShuffledHashJoinExecTransformer Inner (32) + : : : :- ^ InputIteratorTransformer (23) + : : : : +- ^ InputAdapter (22) + : : : : +- ^ ShuffleQueryStage (21), Statistics(X) + : : : : +- ColumnarExchange (20) + : : : : +- ^ ProjectExecTransformer (18) + : : : : +- ^ ShuffledHashJoinExecTransformer Inner (17) + : : : : :- ^ InputIteratorTransformer (8) + : : : : : +- ^ InputAdapter (7) + : : : : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : : : : +- ColumnarExchange (5) + : : : : : +- ^ ProjectExecTransformer (3) + : : : : : +- ^ FilterExecTransformer (2) + : : : : : +- ^ Scan parquet (1) + : : : : +- ^ InputIteratorTransformer (16) + : : : : +- ^ InputAdapter (15) + : : : : +- ^ ShuffleQueryStage (14), Statistics(X) + : : : : +- ColumnarExchange (13) + : : : : +- ^ ProjectExecTransformer (11) + : : : : +- ^ FilterExecTransformer (10) + : : : : +- ^ Scan parquet (9) + : : : +- ^ InputIteratorTransformer (31) + : : : +- ^ InputAdapter (30) + : : : +- ^ ShuffleQueryStage (29), Statistics(X) + : : : +- ColumnarExchange (28) + : : : +- ^ ProjectExecTransformer (26) + : : : +- ^ FilterExecTransformer (25) + : : : +- ^ Scan parquet (24) + : : +- ^ InputIteratorTransformer (46) + : : +- ^ InputAdapter (45) + : : +- ^ ShuffleQueryStage (44), Statistics(X) + : : +- ColumnarExchange (43) + : : +- ^ ProjectExecTransformer (41) + : : +- ^ FilterExecTransformer (40) + : : +- ^ Scan parquet (39) + : +- ^ InputIteratorTransformer (61) + : +- ^ InputAdapter (60) + : +- ^ ShuffleQueryStage (59), Statistics(X) + : +- ColumnarExchange (58) + : +- ^ ProjectExecTransformer (56) + : +- ^ FilterExecTransformer (55) + : +- ^ Scan parquet (54) + +- ^ InputIteratorTransformer (72) + +- ^ InputAdapter (71) + +- ^ ShuffleQueryStage (70), Statistics(X) + +- ReusedExchange (69) ++- == Initial Plan == + Sort (127) + +- Exchange (126) + +- HashAggregate (125) + +- Exchange (124) + +- HashAggregate (123) + +- Project (122) + +- ShuffledHashJoin Inner BuildRight (121) + :- Exchange (117) + : +- Project (116) + : +- ShuffledHashJoin Inner BuildRight (115) + : :- Exchange (111) + : : +- Project (110) + : : +- ShuffledHashJoin Inner BuildRight (109) + : : :- Exchange (105) + : : : +- Project (104) + : : : +- ShuffledHashJoin Inner BuildRight (103) + : : : :- Exchange (99) + : : : : +- Project (98) + : : : : +- ShuffledHashJoin Inner BuildLeft (97) + : : : : :- Exchange (93) + : : : : : +- Filter (92) + : : : : : +- Scan parquet (91) + : : : : +- Exchange (96) + : : : : +- Filter (95) + : : : : +- Scan parquet (94) + : : : +- Exchange (102) + : : : +- Filter (101) + : : : +- Scan parquet (100) + : : +- Exchange (108) + : : +- Filter (107) + : : +- Scan parquet (106) + : +- Exchange (114) + : +- Filter (113) + : +- Scan parquet (112) + +- Exchange (120) + +- Filter (119) + +- Scan parquet (118) + + +(1) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(3) ProjectExecTransformer +Output [3]: [hash(s_suppkey#X, 42) AS hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Input [2]: [s_suppkey#X, s_nationkey#X] + +(4) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: false + +(5) ColumnarExchange +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [s_suppkey#X, s_nationkey#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(7) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(8) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(9) Scan parquet +Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(11) ProjectExecTransformer +Output [6]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(12) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: false + +(13) ColumnarExchange +Input [6]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: X + +(15) InputAdapter +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(16) InputIteratorTransformer +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [s_suppkey#X] +Right keys [1]: [l_suppkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [6]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(19) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: false + +(20) ColumnarExchange +Input [6]: [hash_partition_key#X, s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X], [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: X + +(22) InputAdapter +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(23) InputIteratorTransformer +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(24) Scan parquet +Output [2]: [o_orderkey#X, o_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] +ReadSchema: struct + +(25) FilterExecTransformer +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) + +(26) ProjectExecTransformer +Output [3]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_custkey#X] +Input [2]: [o_orderkey#X, o_custkey#X] + +(27) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderkey#X, o_custkey#X] +Arguments: false + +(28) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderkey#X, o_custkey#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_custkey#X], [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [2]: [o_orderkey#X, o_custkey#X] +Arguments: X + +(30) InputAdapter +Input [2]: [o_orderkey#X, o_custkey#X] + +(31) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_custkey#X] + +(32) ShuffledHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(33) ProjectExecTransformer +Output [6]: [hash(o_custkey#X, 42) AS hash_partition_key#X, s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] + +(34) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: false + +(35) ColumnarExchange +Input [6]: [hash_partition_key#X, s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X], [plan_id=X], [id=#X] + +(36) ShuffleQueryStage +Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: X + +(37) InputAdapter +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] + +(38) InputIteratorTransformer +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] + +(39) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(40) FilterExecTransformer +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(41) ProjectExecTransformer +Output [3]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_nationkey#X] +Input [2]: [c_custkey#X, c_nationkey#X] + +(42) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, c_custkey#X, c_nationkey#X] +Arguments: false + +(43) ColumnarExchange +Input [3]: [hash_partition_key#X, c_custkey#X, c_nationkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_nationkey#X], [plan_id=X], [id=#X] + +(44) ShuffleQueryStage +Output [2]: [c_custkey#X, c_nationkey#X] +Arguments: X + +(45) InputAdapter +Input [2]: [c_custkey#X, c_nationkey#X] + +(46) InputIteratorTransformer +Input [2]: [c_custkey#X, c_nationkey#X] + +(47) ShuffledHashJoinExecTransformer +Left keys [1]: [o_custkey#X] +Right keys [1]: [c_custkey#X] +Join type: Inner +Join condition: None + +(48) ProjectExecTransformer +Output [6]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] + +(49) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: false + +(50) ColumnarExchange +Input [6]: [hash_partition_key#X, s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X], [plan_id=X], [id=#X] + +(51) ShuffleQueryStage +Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: X + +(52) InputAdapter +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] + +(53) InputIteratorTransformer +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] + +(54) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] +ReadSchema: struct + +(55) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) + +(56) ProjectExecTransformer +Output [3]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X, n_name#X] +Input [2]: [n_nationkey#X, n_name#X] + +(57) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, n_nationkey#X, n_name#X] +Arguments: false + +(58) ColumnarExchange +Input [3]: [hash_partition_key#X, n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X, n_name#X], [plan_id=X], [id=#X] + +(59) ShuffleQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(60) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(61) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(62) ShuffledHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(63) ProjectExecTransformer +Output [6]: [hash(c_nationkey#X, 42) AS hash_partition_key#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] + +(64) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: false + +(65) ColumnarExchange +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X], [plan_id=X], [id=#X] + +(66) ShuffleQueryStage +Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: X + +(67) InputAdapter +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] + +(68) InputIteratorTransformer +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] + +(69) ReusedExchange [Reuses operator id: 58] +Output [2]: [n_nationkey#X, n_name#X] + +(70) ShuffleQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(71) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(72) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(73) ShuffledHashJoinExecTransformer +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) + +(74) ProjectExecTransformer +Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] +Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] + +(75) FlushableHashAggregateExecTransformer +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] +Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] +Functions [1]: [partial_sum(volume#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(76) ProjectExecTransformer +Output [6]: [hash(supp_nation#X, cust_nation#X, l_year#X, 42) AS hash_partition_key#X, supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(77) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Arguments: false + +(78) ColumnarExchange +Input [6]: [hash_partition_key#X, supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(79) ShuffleQueryStage +Output [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Arguments: X + +(80) InputAdapter +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(81) InputIteratorTransformer +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(82) RegularHashAggregateExecTransformer +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] +Functions [1]: [sum(volume#X)] +Aggregate Attributes [1]: [sum(volume#X)#X] +Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] + +(83) WholeStageCodegenTransformer (X) +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: false + +(84) ColumnarExchange +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(85) ShuffleQueryStage +Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: X + +(86) InputAdapter +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] + +(87) InputIteratorTransformer +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] + +(88) SortExecTransformer +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 + +(89) WholeStageCodegenTransformer (X) +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: false + +(90) VeloxColumnarToRowExec +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] + +(91) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(92) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(93) Exchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(94) Scan parquet +Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(95) Filter +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(96) Exchange +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(97) ShuffledHashJoin +Left keys [1]: [s_suppkey#X] +Right keys [1]: [l_suppkey#X] +Join type: Inner +Join condition: None + +(98) Project +Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] + +(99) Exchange +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(100) Scan parquet +Output [2]: [o_orderkey#X, o_custkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] +ReadSchema: struct + +(101) Filter +Input [2]: [o_orderkey#X, o_custkey#X] +Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) + +(102) Exchange +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(103) ShuffledHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(104) Project +Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] + +(105) Exchange +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(106) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(107) Filter +Input [2]: [c_custkey#X, c_nationkey#X] +Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(108) Exchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(109) ShuffledHashJoin +Left keys [1]: [o_custkey#X] +Right keys [1]: [c_custkey#X] +Join type: Inner +Join condition: None + +(110) Project +Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] + +(111) Exchange +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(112) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] +ReadSchema: struct + +(113) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) + +(114) Exchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(115) ShuffledHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(116) Project +Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] + +(117) Exchange +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(118) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] +ReadSchema: struct + +(119) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) + +(120) Exchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(121) ShuffledHashJoin +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) + +(122) Project +Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] +Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] + +(123) HashAggregate +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] +Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] +Functions [1]: [partial_sum(volume#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] + +(124) Exchange +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(125) HashAggregate +Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] +Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] +Functions [1]: [sum(volume#X)] +Aggregate Attributes [1]: [sum(volume#X)#X] +Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] + +(126) Exchange +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(127) Sort +Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 + +(128) AdaptiveSparkPlan +Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt new file mode 100644 index 000000000000..9c24e9ec5a01 --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt @@ -0,0 +1,927 @@ +== Physical Plan == +AdaptiveSparkPlan (177) ++- == Final Plan == + VeloxColumnarToRowExec (125) + +- ^ SortExecTransformer (123) + +- ^ InputIteratorTransformer (122) + +- ^ InputAdapter (121) + +- ^ ShuffleQueryStage (120), Statistics(X) + +- ColumnarExchange (119) + +- ^ ProjectExecTransformer (117) + +- ^ RegularHashAggregateExecTransformer (116) + +- ^ InputIteratorTransformer (115) + +- ^ InputAdapter (114) + +- ^ ShuffleQueryStage (113), Statistics(X) + +- ColumnarExchange (112) + +- ^ ProjectExecTransformer (110) + +- ^ FlushableHashAggregateExecTransformer (109) + +- ^ ProjectExecTransformer (108) + +- ^ ShuffledHashJoinExecTransformer Inner (107) + :- ^ InputIteratorTransformer (98) + : +- ^ InputAdapter (97) + : +- ^ ShuffleQueryStage (96), Statistics(X) + : +- ColumnarExchange (95) + : +- ^ ProjectExecTransformer (93) + : +- ^ ShuffledHashJoinExecTransformer Inner (92) + : :- ^ InputIteratorTransformer (83) + : : +- ^ InputAdapter (82) + : : +- ^ ShuffleQueryStage (81), Statistics(X) + : : +- ColumnarExchange (80) + : : +- ^ ProjectExecTransformer (78) + : : +- ^ ShuffledHashJoinExecTransformer Inner (77) + : : :- ^ InputIteratorTransformer (68) + : : : +- ^ InputAdapter (67) + : : : +- ^ ShuffleQueryStage (66), Statistics(X) + : : : +- ColumnarExchange (65) + : : : +- ^ ProjectExecTransformer (63) + : : : +- ^ ShuffledHashJoinExecTransformer Inner (62) + : : : :- ^ InputIteratorTransformer (53) + : : : : +- ^ InputAdapter (52) + : : : : +- ^ ShuffleQueryStage (51), Statistics(X) + : : : : +- ColumnarExchange (50) + : : : : +- ^ ProjectExecTransformer (48) + : : : : +- ^ ShuffledHashJoinExecTransformer Inner (47) + : : : : :- ^ InputIteratorTransformer (38) + : : : : : +- ^ InputAdapter (37) + : : : : : +- ^ ShuffleQueryStage (36), Statistics(X) + : : : : : +- ColumnarExchange (35) + : : : : : +- ^ ProjectExecTransformer (33) + : : : : : +- ^ ShuffledHashJoinExecTransformer Inner (32) + : : : : : :- ^ InputIteratorTransformer (23) + : : : : : : +- ^ InputAdapter (22) + : : : : : : +- ^ ShuffleQueryStage (21), Statistics(X) + : : : : : : +- ColumnarExchange (20) + : : : : : : +- ^ ProjectExecTransformer (18) + : : : : : : +- ^ ShuffledHashJoinExecTransformer Inner (17) + : : : : : : :- ^ InputIteratorTransformer (8) + : : : : : : : +- ^ InputAdapter (7) + : : : : : : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : : : : : : +- ColumnarExchange (5) + : : : : : : : +- ^ ProjectExecTransformer (3) + : : : : : : : +- ^ FilterExecTransformer (2) + : : : : : : : +- ^ Scan parquet (1) + : : : : : : +- ^ InputIteratorTransformer (16) + : : : : : : +- ^ InputAdapter (15) + : : : : : : +- ^ ShuffleQueryStage (14), Statistics(X) + : : : : : : +- ColumnarExchange (13) + : : : : : : +- ^ ProjectExecTransformer (11) + : : : : : : +- ^ FilterExecTransformer (10) + : : : : : : +- ^ Scan parquet (9) + : : : : : +- ^ InputIteratorTransformer (31) + : : : : : +- ^ InputAdapter (30) + : : : : : +- ^ ShuffleQueryStage (29), Statistics(X) + : : : : : +- ColumnarExchange (28) + : : : : : +- ^ ProjectExecTransformer (26) + : : : : : +- ^ FilterExecTransformer (25) + : : : : : +- ^ Scan parquet (24) + : : : : +- ^ InputIteratorTransformer (46) + : : : : +- ^ InputAdapter (45) + : : : : +- ^ ShuffleQueryStage (44), Statistics(X) + : : : : +- ColumnarExchange (43) + : : : : +- ^ ProjectExecTransformer (41) + : : : : +- ^ FilterExecTransformer (40) + : : : : +- ^ Scan parquet (39) + : : : +- ^ InputIteratorTransformer (61) + : : : +- ^ InputAdapter (60) + : : : +- ^ ShuffleQueryStage (59), Statistics(X) + : : : +- ColumnarExchange (58) + : : : +- ^ ProjectExecTransformer (56) + : : : +- ^ FilterExecTransformer (55) + : : : +- ^ Scan parquet (54) + : : +- ^ InputIteratorTransformer (76) + : : +- ^ InputAdapter (75) + : : +- ^ ShuffleQueryStage (74), Statistics(X) + : : +- ColumnarExchange (73) + : : +- ^ ProjectExecTransformer (71) + : : +- ^ FilterExecTransformer (70) + : : +- ^ Scan parquet (69) + : +- ^ InputIteratorTransformer (91) + : +- ^ InputAdapter (90) + : +- ^ ShuffleQueryStage (89), Statistics(X) + : +- ColumnarExchange (88) + : +- ^ ProjectExecTransformer (86) + : +- ^ FilterExecTransformer (85) + : +- ^ Scan parquet (84) + +- ^ InputIteratorTransformer (106) + +- ^ InputAdapter (105) + +- ^ ShuffleQueryStage (104), Statistics(X) + +- ColumnarExchange (103) + +- ^ ProjectExecTransformer (101) + +- ^ FilterExecTransformer (100) + +- ^ Scan parquet (99) ++- == Initial Plan == + Sort (176) + +- Exchange (175) + +- HashAggregate (174) + +- Exchange (173) + +- HashAggregate (172) + +- Project (171) + +- ShuffledHashJoin Inner BuildRight (170) + :- Exchange (165) + : +- Project (164) + : +- ShuffledHashJoin Inner BuildRight (163) + : :- Exchange (159) + : : +- Project (158) + : : +- ShuffledHashJoin Inner BuildRight (157) + : : :- Exchange (153) + : : : +- Project (152) + : : : +- ShuffledHashJoin Inner BuildRight (151) + : : : :- Exchange (147) + : : : : +- Project (146) + : : : : +- ShuffledHashJoin Inner BuildRight (145) + : : : : :- Exchange (141) + : : : : : +- Project (140) + : : : : : +- ShuffledHashJoin Inner BuildRight (139) + : : : : : :- Exchange (135) + : : : : : : +- Project (134) + : : : : : : +- ShuffledHashJoin Inner BuildLeft (133) + : : : : : : :- Exchange (129) + : : : : : : : +- Project (128) + : : : : : : : +- Filter (127) + : : : : : : : +- Scan parquet (126) + : : : : : : +- Exchange (132) + : : : : : : +- Filter (131) + : : : : : : +- Scan parquet (130) + : : : : : +- Exchange (138) + : : : : : +- Filter (137) + : : : : : +- Scan parquet (136) + : : : : +- Exchange (144) + : : : : +- Filter (143) + : : : : +- Scan parquet (142) + : : : +- Exchange (150) + : : : +- Filter (149) + : : : +- Scan parquet (148) + : : +- Exchange (156) + : : +- Filter (155) + : : +- Scan parquet (154) + : +- Exchange (162) + : +- Filter (161) + : +- Scan parquet (160) + +- Exchange (169) + +- Project (168) + +- Filter (167) + +- Scan parquet (166) + + +(1) Scan parquet +Output [2]: [p_partkey#X, p_type#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_type), EqualTo(p_type,ECONOMY ANODIZED STEEL), IsNotNull(p_partkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [p_partkey#X, p_type#X] +Arguments: ((isnotnull(p_type#X) AND (p_type#X = ECONOMY ANODIZED STEEL)) AND isnotnull(p_partkey#X)) + +(3) ProjectExecTransformer +Output [2]: [hash(p_partkey#X, 42) AS hash_partition_key#X, p_partkey#X] +Input [2]: [p_partkey#X, p_type#X] + +(4) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, p_partkey#X] +Arguments: false + +(5) ColumnarExchange +Input [2]: [hash_partition_key#X, p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [p_partkey#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(7) InputAdapter +Input [1]: [p_partkey#X] + +(8) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(9) Scan parquet +Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(11) ProjectExecTransformer +Output [6]: [hash(l_partkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(12) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(13) ColumnarExchange +Input [6]: [hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(15) InputAdapter +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(16) InputIteratorTransformer +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [5]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(19) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(20) ColumnarExchange +Input [5]: [hash_partition_key#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(22) InputAdapter +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(23) InputIteratorTransformer +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(24) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(25) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(26) ProjectExecTransformer +Output [3]: [hash(s_suppkey#X, 42) AS hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Input [2]: [s_suppkey#X, s_nationkey#X] + +(27) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: false + +(28) ColumnarExchange +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [s_suppkey#X, s_nationkey#X], [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(30) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(31) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(32) ShuffledHashJoinExecTransformer +Left keys [1]: [l_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(33) ProjectExecTransformer +Output [5]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(34) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: false + +(35) ColumnarExchange +Input [5]: [hash_partition_key#X, l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X], [plan_id=X], [id=#X] + +(36) ShuffleQueryStage +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: X + +(37) InputAdapter +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] + +(38) InputIteratorTransformer +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] + +(39) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] +ReadSchema: struct + +(40) FilterExecTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) + +(41) ProjectExecTransformer +Output [4]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_custkey#X, o_orderdate#X] +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(42) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: false + +(43) ColumnarExchange +Input [4]: [hash_partition_key#X, o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_custkey#X, o_orderdate#X], [plan_id=X], [id=#X] + +(44) ShuffleQueryStage +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: X + +(45) InputAdapter +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(46) InputIteratorTransformer +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] + +(47) ShuffledHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(48) ProjectExecTransformer +Output [6]: [hash(o_custkey#X, 42) AS hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] + +(49) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: false + +(50) ColumnarExchange +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X], [plan_id=X], [id=#X] + +(51) ShuffleQueryStage +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: X + +(52) InputAdapter +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] + +(53) InputIteratorTransformer +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] + +(54) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(55) FilterExecTransformer +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(56) ProjectExecTransformer +Output [3]: [hash(c_custkey#X, 42) AS hash_partition_key#X, c_custkey#X, c_nationkey#X] +Input [2]: [c_custkey#X, c_nationkey#X] + +(57) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, c_custkey#X, c_nationkey#X] +Arguments: false + +(58) ColumnarExchange +Input [3]: [hash_partition_key#X, c_custkey#X, c_nationkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [c_custkey#X, c_nationkey#X], [plan_id=X], [id=#X] + +(59) ShuffleQueryStage +Output [2]: [c_custkey#X, c_nationkey#X] +Arguments: X + +(60) InputAdapter +Input [2]: [c_custkey#X, c_nationkey#X] + +(61) InputIteratorTransformer +Input [2]: [c_custkey#X, c_nationkey#X] + +(62) ShuffledHashJoinExecTransformer +Left keys [1]: [o_custkey#X] +Right keys [1]: [c_custkey#X] +Join type: Inner +Join condition: None + +(63) ProjectExecTransformer +Output [6]: [hash(c_nationkey#X, 42) AS hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] + +(64) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: false + +(65) ColumnarExchange +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X], [plan_id=X], [id=#X] + +(66) ShuffleQueryStage +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: X + +(67) InputAdapter +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] + +(68) InputIteratorTransformer +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] + +(69) Scan parquet +Output [2]: [n_nationkey#X, n_regionkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] +ReadSchema: struct + +(70) FilterExecTransformer +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) + +(71) ProjectExecTransformer +Output [3]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X, n_regionkey#X] +Input [2]: [n_nationkey#X, n_regionkey#X] + +(72) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, n_nationkey#X, n_regionkey#X] +Arguments: false + +(73) ColumnarExchange +Input [3]: [hash_partition_key#X, n_nationkey#X, n_regionkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X, n_regionkey#X], [plan_id=X], [id=#X] + +(74) ShuffleQueryStage +Output [2]: [n_nationkey#X, n_regionkey#X] +Arguments: X + +(75) InputAdapter +Input [2]: [n_nationkey#X, n_regionkey#X] + +(76) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_regionkey#X] + +(77) ShuffledHashJoinExecTransformer +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(78) ProjectExecTransformer +Output [6]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] + +(79) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: false + +(80) ColumnarExchange +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X], [plan_id=X], [id=#X] + +(81) ShuffleQueryStage +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: X + +(82) InputAdapter +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] + +(83) InputIteratorTransformer +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] + +(84) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(85) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: isnotnull(n_nationkey#X) + +(86) ProjectExecTransformer +Output [3]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X, n_name#X] +Input [2]: [n_nationkey#X, n_name#X] + +(87) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, n_nationkey#X, n_name#X] +Arguments: false + +(88) ColumnarExchange +Input [3]: [hash_partition_key#X, n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X, n_name#X], [plan_id=X], [id=#X] + +(89) ShuffleQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(90) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(91) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(92) ShuffledHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(93) ProjectExecTransformer +Output [6]: [hash(n_regionkey#X, 42) AS hash_partition_key#X, l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] + +(94) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: false + +(95) ColumnarExchange +Input [6]: [hash_partition_key#X, l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X], [plan_id=X], [id=#X] + +(96) ShuffleQueryStage +Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: X + +(97) InputAdapter +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] + +(98) InputIteratorTransformer +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] + +(99) Scan parquet +Output [2]: [r_regionkey#X, r_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] +ReadSchema: struct + +(100) FilterExecTransformer +Input [2]: [r_regionkey#X, r_name#X] +Arguments: ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) + +(101) ProjectExecTransformer +Output [2]: [hash(r_regionkey#X, 42) AS hash_partition_key#X, r_regionkey#X] +Input [2]: [r_regionkey#X, r_name#X] + +(102) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, r_regionkey#X] +Arguments: false + +(103) ColumnarExchange +Input [2]: [hash_partition_key#X, r_regionkey#X] +Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [r_regionkey#X], [plan_id=X], [id=#X] + +(104) ShuffleQueryStage +Output [1]: [r_regionkey#X] +Arguments: X + +(105) InputAdapter +Input [1]: [r_regionkey#X] + +(106) InputIteratorTransformer +Input [1]: [r_regionkey#X] + +(107) ShuffledHashJoinExecTransformer +Left keys [1]: [n_regionkey#X] +Right keys [1]: [r_regionkey#X] +Join type: Inner +Join condition: None + +(108) ProjectExecTransformer +Output [4]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X, CASE WHEN (n_name#X = BRAZIL) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END AS _pre_X#X] +Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] + +(109) FlushableHashAggregateExecTransformer +Input [4]: [o_year#X, volume#X, nation#X, _pre_X#X] +Keys [1]: [o_year#X] +Functions [2]: [partial_sum(_pre_X#X), partial_sum(volume#X)] +Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(110) ProjectExecTransformer +Output [6]: [hash(o_year#X, 42) AS hash_partition_key#X, o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(111) WholeStageCodegenTransformer (X) +Input [6]: [hash_partition_key#X, o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: false + +(112) ColumnarExchange +Input [6]: [hash_partition_key#X, o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(113) ShuffleQueryStage +Output [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: X + +(114) InputAdapter +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(115) InputIteratorTransformer +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(116) RegularHashAggregateExecTransformer +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Keys [1]: [o_year#X] +Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] +Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] +Results [3]: [o_year#X, sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] + +(117) ProjectExecTransformer +Output [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] +Input [3]: [o_year#X, sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] + +(118) WholeStageCodegenTransformer (X) +Input [2]: [o_year#X, mkt_share#X] +Arguments: false + +(119) ColumnarExchange +Input [2]: [o_year#X, mkt_share#X] +Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(120) ShuffleQueryStage +Output [2]: [o_year#X, mkt_share#X] +Arguments: X + +(121) InputAdapter +Input [2]: [o_year#X, mkt_share#X] + +(122) InputIteratorTransformer +Input [2]: [o_year#X, mkt_share#X] + +(123) SortExecTransformer +Input [2]: [o_year#X, mkt_share#X] +Arguments: [o_year#X ASC NULLS FIRST], true, 0 + +(124) WholeStageCodegenTransformer (X) +Input [2]: [o_year#X, mkt_share#X] +Arguments: false + +(125) VeloxColumnarToRowExec +Input [2]: [o_year#X, mkt_share#X] + +(126) Scan parquet +Output [2]: [p_partkey#X, p_type#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_type), EqualTo(p_type,ECONOMY ANODIZED STEEL), IsNotNull(p_partkey)] +ReadSchema: struct + +(127) Filter +Input [2]: [p_partkey#X, p_type#X] +Condition : ((isnotnull(p_type#X) AND (p_type#X = ECONOMY ANODIZED STEEL)) AND isnotnull(p_partkey#X)) + +(128) Project +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_type#X] + +(129) Exchange +Input [1]: [p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(130) Scan parquet +Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(131) Filter +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(132) Exchange +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(133) ShuffledHashJoin +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: None + +(134) Project +Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] + +(135) Exchange +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(136) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(137) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(138) Exchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(139) ShuffledHashJoin +Left keys [1]: [l_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(140) Project +Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(141) Exchange +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(142) Scan parquet +Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] +ReadSchema: struct + +(143) Filter +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) + +(144) Exchange +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(145) ShuffledHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(146) Project +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] + +(147) Exchange +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(148) Scan parquet +Output [2]: [c_custkey#X, c_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] +ReadSchema: struct + +(149) Filter +Input [2]: [c_custkey#X, c_nationkey#X] +Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) + +(150) Exchange +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(151) ShuffledHashJoin +Left keys [1]: [o_custkey#X] +Right keys [1]: [c_custkey#X] +Join type: Inner +Join condition: None + +(152) Project +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] + +(153) Exchange +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(154) Scan parquet +Output [2]: [n_nationkey#X, n_regionkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] +ReadSchema: struct + +(155) Filter +Input [2]: [n_nationkey#X, n_regionkey#X] +Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) + +(156) Exchange +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(157) ShuffledHashJoin +Left keys [1]: [c_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(158) Project +Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] + +(159) Exchange +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(160) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(161) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : isnotnull(n_nationkey#X) + +(162) Exchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(163) ShuffledHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(164) Project +Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] + +(165) Exchange +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(166) Scan parquet +Output [2]: [r_regionkey#X, r_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] +ReadSchema: struct + +(167) Filter +Input [2]: [r_regionkey#X, r_name#X] +Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) + +(168) Project +Output [1]: [r_regionkey#X] +Input [2]: [r_regionkey#X, r_name#X] + +(169) Exchange +Input [1]: [r_regionkey#X] +Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(170) ShuffledHashJoin +Left keys [1]: [n_regionkey#X] +Right keys [1]: [r_regionkey#X] +Join type: Inner +Join condition: None + +(171) Project +Output [3]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X] +Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] + +(172) HashAggregate +Input [3]: [o_year#X, volume#X, nation#X] +Keys [1]: [o_year#X] +Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] +Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] +Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] + +(173) Exchange +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(174) HashAggregate +Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] +Keys [1]: [o_year#X] +Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] +Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] +Results [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] + +(175) Exchange +Input [2]: [o_year#X, mkt_share#X] +Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(176) Sort +Input [2]: [o_year#X, mkt_share#X] +Arguments: [o_year#X ASC NULLS FIRST], true, 0 + +(177) AdaptiveSparkPlan +Output [2]: [o_year#X, mkt_share#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt new file mode 100644 index 000000000000..aef854c3549e --- /dev/null +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt @@ -0,0 +1,699 @@ +== Physical Plan == +AdaptiveSparkPlan (133) ++- == Final Plan == + VeloxColumnarToRowExec (94) + +- ^ SortExecTransformer (92) + +- ^ InputIteratorTransformer (91) + +- ^ InputAdapter (90) + +- ^ ShuffleQueryStage (89), Statistics(X) + +- ColumnarExchange (88) + +- ^ RegularHashAggregateExecTransformer (86) + +- ^ InputIteratorTransformer (85) + +- ^ InputAdapter (84) + +- ^ ShuffleQueryStage (83), Statistics(X) + +- ColumnarExchange (82) + +- ^ ProjectExecTransformer (80) + +- ^ FlushableHashAggregateExecTransformer (79) + +- ^ ProjectExecTransformer (78) + +- ^ ShuffledHashJoinExecTransformer Inner (77) + :- ^ InputIteratorTransformer (68) + : +- ^ InputAdapter (67) + : +- ^ ShuffleQueryStage (66), Statistics(X) + : +- ColumnarExchange (65) + : +- ^ ProjectExecTransformer (63) + : +- ^ ShuffledHashJoinExecTransformer Inner (62) + : :- ^ InputIteratorTransformer (53) + : : +- ^ InputAdapter (52) + : : +- ^ ShuffleQueryStage (51), Statistics(X) + : : +- ColumnarExchange (50) + : : +- ^ ProjectExecTransformer (48) + : : +- ^ ShuffledHashJoinExecTransformer Inner (47) + : : :- ^ InputIteratorTransformer (38) + : : : +- ^ InputAdapter (37) + : : : +- ^ ShuffleQueryStage (36), Statistics(X) + : : : +- ColumnarExchange (35) + : : : +- ^ ProjectExecTransformer (33) + : : : +- ^ ShuffledHashJoinExecTransformer Inner (32) + : : : :- ^ InputIteratorTransformer (23) + : : : : +- ^ InputAdapter (22) + : : : : +- ^ ShuffleQueryStage (21), Statistics(X) + : : : : +- ColumnarExchange (20) + : : : : +- ^ ProjectExecTransformer (18) + : : : : +- ^ ShuffledHashJoinExecTransformer Inner (17) + : : : : :- ^ InputIteratorTransformer (8) + : : : : : +- ^ InputAdapter (7) + : : : : : +- ^ ShuffleQueryStage (6), Statistics(X) + : : : : : +- ColumnarExchange (5) + : : : : : +- ^ ProjectExecTransformer (3) + : : : : : +- ^ FilterExecTransformer (2) + : : : : : +- ^ Scan parquet (1) + : : : : +- ^ InputIteratorTransformer (16) + : : : : +- ^ InputAdapter (15) + : : : : +- ^ ShuffleQueryStage (14), Statistics(X) + : : : : +- ColumnarExchange (13) + : : : : +- ^ ProjectExecTransformer (11) + : : : : +- ^ FilterExecTransformer (10) + : : : : +- ^ Scan parquet (9) + : : : +- ^ InputIteratorTransformer (31) + : : : +- ^ InputAdapter (30) + : : : +- ^ ShuffleQueryStage (29), Statistics(X) + : : : +- ColumnarExchange (28) + : : : +- ^ ProjectExecTransformer (26) + : : : +- ^ FilterExecTransformer (25) + : : : +- ^ Scan parquet (24) + : : +- ^ InputIteratorTransformer (46) + : : +- ^ InputAdapter (45) + : : +- ^ ShuffleQueryStage (44), Statistics(X) + : : +- ColumnarExchange (43) + : : +- ^ ProjectExecTransformer (41) + : : +- ^ FilterExecTransformer (40) + : : +- ^ Scan parquet (39) + : +- ^ InputIteratorTransformer (61) + : +- ^ InputAdapter (60) + : +- ^ ShuffleQueryStage (59), Statistics(X) + : +- ColumnarExchange (58) + : +- ^ ProjectExecTransformer (56) + : +- ^ FilterExecTransformer (55) + : +- ^ Scan parquet (54) + +- ^ InputIteratorTransformer (76) + +- ^ InputAdapter (75) + +- ^ ShuffleQueryStage (74), Statistics(X) + +- ColumnarExchange (73) + +- ^ ProjectExecTransformer (71) + +- ^ FilterExecTransformer (70) + +- ^ Scan parquet (69) ++- == Initial Plan == + Sort (132) + +- Exchange (131) + +- HashAggregate (130) + +- Exchange (129) + +- HashAggregate (128) + +- Project (127) + +- ShuffledHashJoin Inner BuildRight (126) + :- Exchange (122) + : +- Project (121) + : +- ShuffledHashJoin Inner BuildRight (120) + : :- Exchange (116) + : : +- Project (115) + : : +- ShuffledHashJoin Inner BuildRight (114) + : : :- Exchange (110) + : : : +- Project (109) + : : : +- ShuffledHashJoin Inner BuildRight (108) + : : : :- Exchange (104) + : : : : +- Project (103) + : : : : +- ShuffledHashJoin Inner BuildLeft (102) + : : : : :- Exchange (98) + : : : : : +- Project (97) + : : : : : +- Filter (96) + : : : : : +- Scan parquet (95) + : : : : +- Exchange (101) + : : : : +- Filter (100) + : : : : +- Scan parquet (99) + : : : +- Exchange (107) + : : : +- Filter (106) + : : : +- Scan parquet (105) + : : +- Exchange (113) + : : +- Filter (112) + : : +- Scan parquet (111) + : +- Exchange (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Exchange (125) + +- Filter (124) + +- Scan parquet (123) + + +(1) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringContains(p_name,green), IsNotNull(p_partkey)] +ReadSchema: struct + +(2) FilterExecTransformer +Input [2]: [p_partkey#X, p_name#X] +Arguments: ((isnotnull(p_name#X) AND Contains(p_name#X, green)) AND isnotnull(p_partkey#X)) + +(3) ProjectExecTransformer +Output [2]: [hash(p_partkey#X, 42) AS hash_partition_key#X, p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(4) WholeStageCodegenTransformer (X) +Input [2]: [hash_partition_key#X, p_partkey#X] +Arguments: false + +(5) ColumnarExchange +Input [2]: [hash_partition_key#X, p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [p_partkey#X], [plan_id=X], [id=#X] + +(6) ShuffleQueryStage +Output [1]: [p_partkey#X] +Arguments: X + +(7) InputAdapter +Input [1]: [p_partkey#X] + +(8) InputIteratorTransformer +Input [1]: [p_partkey#X] + +(9) Scan parquet +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(10) FilterExecTransformer +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(11) ProjectExecTransformer +Output [7]: [hash(l_partkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(12) WholeStageCodegenTransformer (X) +Input [7]: [hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(13) ColumnarExchange +Input [7]: [hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(14) ShuffleQueryStage +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(15) InputAdapter +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(16) InputIteratorTransformer +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(17) ShuffledHashJoinExecTransformer +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: None + +(18) ProjectExecTransformer +Output [7]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(19) WholeStageCodegenTransformer (X) +Input [7]: [hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: false + +(20) ColumnarExchange +Input [7]: [hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X], [plan_id=X], [id=#X] + +(21) ShuffleQueryStage +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: X + +(22) InputAdapter +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(23) InputIteratorTransformer +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(24) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(25) FilterExecTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(26) ProjectExecTransformer +Output [3]: [hash(s_suppkey#X, 42) AS hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Input [2]: [s_suppkey#X, s_nationkey#X] + +(27) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: false + +(28) ColumnarExchange +Input [3]: [hash_partition_key#X, s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [s_suppkey#X, s_nationkey#X], [plan_id=X], [id=#X] + +(29) ShuffleQueryStage +Output [2]: [s_suppkey#X, s_nationkey#X] +Arguments: X + +(30) InputAdapter +Input [2]: [s_suppkey#X, s_nationkey#X] + +(31) InputIteratorTransformer +Input [2]: [s_suppkey#X, s_nationkey#X] + +(32) ShuffledHashJoinExecTransformer +Left keys [1]: [l_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(33) ProjectExecTransformer +Output [8]: [hash(l_suppkey#X, l_partkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(34) WholeStageCodegenTransformer (X) +Input [8]: [hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: false + +(35) ColumnarExchange +Input [8]: [hash_partition_key#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X], [plan_id=X], [id=#X] + +(36) ShuffleQueryStage +Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: X + +(37) InputAdapter +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] + +(38) InputIteratorTransformer +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] + +(39) Scan parquet +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] +ReadSchema: struct + +(40) FilterExecTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) + +(41) ProjectExecTransformer +Output [4]: [hash(ps_suppkey#X, ps_partkey#X, 42) AS hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(42) WholeStageCodegenTransformer (X) +Input [4]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: false + +(43) ColumnarExchange +Input [4]: [hash_partition_key#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [ps_partkey#X, ps_suppkey#X, ps_supplycost#X], [plan_id=X], [id=#X] + +(44) ShuffleQueryStage +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: X + +(45) InputAdapter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(46) InputIteratorTransformer +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(47) ShuffledHashJoinExecTransformer +Left keys [2]: [l_suppkey#X, l_partkey#X] +Right keys [2]: [ps_suppkey#X, ps_partkey#X] +Join type: Inner +Join condition: None + +(48) ProjectExecTransformer +Output [7]: [hash(l_orderkey#X, 42) AS hash_partition_key#X, l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(49) WholeStageCodegenTransformer (X) +Input [7]: [hash_partition_key#X, l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: false + +(50) ColumnarExchange +Input [7]: [hash_partition_key#X, l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X], [plan_id=X], [id=#X] + +(51) ShuffleQueryStage +Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: X + +(52) InputAdapter +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] + +(53) InputIteratorTransformer +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] + +(54) Scan parquet +Output [2]: [o_orderkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey)] +ReadSchema: struct + +(55) FilterExecTransformer +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: isnotnull(o_orderkey#X) + +(56) ProjectExecTransformer +Output [3]: [hash(o_orderkey#X, 42) AS hash_partition_key#X, o_orderkey#X, o_orderdate#X] +Input [2]: [o_orderkey#X, o_orderdate#X] + +(57) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, o_orderkey#X, o_orderdate#X] +Arguments: false + +(58) ColumnarExchange +Input [3]: [hash_partition_key#X, o_orderkey#X, o_orderdate#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [o_orderkey#X, o_orderdate#X], [plan_id=X], [id=#X] + +(59) ShuffleQueryStage +Output [2]: [o_orderkey#X, o_orderdate#X] +Arguments: X + +(60) InputAdapter +Input [2]: [o_orderkey#X, o_orderdate#X] + +(61) InputIteratorTransformer +Input [2]: [o_orderkey#X, o_orderdate#X] + +(62) ShuffledHashJoinExecTransformer +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(63) ProjectExecTransformer +Output [7]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] + +(64) WholeStageCodegenTransformer (X) +Input [7]: [hash_partition_key#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: false + +(65) ColumnarExchange +Input [7]: [hash_partition_key#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X], [plan_id=X], [id=#X] + +(66) ShuffleQueryStage +Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: X + +(67) InputAdapter +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] + +(68) InputIteratorTransformer +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] + +(69) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(70) FilterExecTransformer +Input [2]: [n_nationkey#X, n_name#X] +Arguments: isnotnull(n_nationkey#X) + +(71) ProjectExecTransformer +Output [3]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X, n_name#X] +Input [2]: [n_nationkey#X, n_name#X] + +(72) WholeStageCodegenTransformer (X) +Input [3]: [hash_partition_key#X, n_nationkey#X, n_name#X] +Arguments: false + +(73) ColumnarExchange +Input [3]: [hash_partition_key#X, n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X, n_name#X], [plan_id=X], [id=#X] + +(74) ShuffleQueryStage +Output [2]: [n_nationkey#X, n_name#X] +Arguments: X + +(75) InputAdapter +Input [2]: [n_nationkey#X, n_name#X] + +(76) InputIteratorTransformer +Input [2]: [n_nationkey#X, n_name#X] + +(77) ShuffledHashJoinExecTransformer +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(78) ProjectExecTransformer +Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] + +(79) FlushableHashAggregateExecTransformer +Input [3]: [nation#X, o_year#X, amount#X] +Keys [2]: [nation#X, o_year#X] +Functions [1]: [partial_sum(amount#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(80) ProjectExecTransformer +Output [5]: [hash(nation#X, o_year#X, 42) AS hash_partition_key#X, nation#X, o_year#X, sum#X, isEmpty#X] +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(81) WholeStageCodegenTransformer (X) +Input [5]: [hash_partition_key#X, nation#X, o_year#X, sum#X, isEmpty#X] +Arguments: false + +(82) ColumnarExchange +Input [5]: [hash_partition_key#X, nation#X, o_year#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [nation#X, o_year#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] + +(83) ShuffleQueryStage +Output [4]: [nation#X, o_year#X, sum#X, isEmpty#X] +Arguments: X + +(84) InputAdapter +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(85) InputIteratorTransformer +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(86) RegularHashAggregateExecTransformer +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] +Keys [2]: [nation#X, o_year#X] +Functions [1]: [sum(amount#X)] +Aggregate Attributes [1]: [sum(amount#X)#X] +Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] + +(87) WholeStageCodegenTransformer (X) +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: false + +(88) ColumnarExchange +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] + +(89) ShuffleQueryStage +Output [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: X + +(90) InputAdapter +Input [3]: [nation#X, o_year#X, sum_profit#X] + +(91) InputIteratorTransformer +Input [3]: [nation#X, o_year#X, sum_profit#X] + +(92) SortExecTransformer +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 + +(93) WholeStageCodegenTransformer (X) +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: false + +(94) VeloxColumnarToRowExec +Input [3]: [nation#X, o_year#X, sum_profit#X] + +(95) Scan parquet +Output [2]: [p_partkey#X, p_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(p_name), StringContains(p_name,green), IsNotNull(p_partkey)] +ReadSchema: struct + +(96) Filter +Input [2]: [p_partkey#X, p_name#X] +Condition : ((isnotnull(p_name#X) AND Contains(p_name#X, green)) AND isnotnull(p_partkey#X)) + +(97) Project +Output [1]: [p_partkey#X] +Input [2]: [p_partkey#X, p_name#X] + +(98) Exchange +Input [1]: [p_partkey#X] +Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(99) Scan parquet +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] +ReadSchema: struct + +(100) Filter +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) + +(101) Exchange +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(102) ShuffledHashJoin +Left keys [1]: [p_partkey#X] +Right keys [1]: [l_partkey#X] +Join type: Inner +Join condition: None + +(103) Project +Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] + +(104) Exchange +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(105) Scan parquet +Output [2]: [s_suppkey#X, s_nationkey#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] +ReadSchema: struct + +(106) Filter +Input [2]: [s_suppkey#X, s_nationkey#X] +Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) + +(107) Exchange +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(108) ShuffledHashJoin +Left keys [1]: [l_suppkey#X] +Right keys [1]: [s_suppkey#X] +Join type: Inner +Join condition: None + +(109) Project +Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] + +(110) Exchange +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(111) Scan parquet +Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] +ReadSchema: struct + +(112) Filter +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) + +(113) Exchange +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(114) ShuffledHashJoin +Left keys [2]: [l_suppkey#X, l_partkey#X] +Right keys [2]: [ps_suppkey#X, ps_partkey#X] +Join type: Inner +Join condition: None + +(115) Project +Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] + +(116) Exchange +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(117) Scan parquet +Output [2]: [o_orderkey#X, o_orderdate#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(o_orderkey)] +ReadSchema: struct + +(118) Filter +Input [2]: [o_orderkey#X, o_orderdate#X] +Condition : isnotnull(o_orderkey#X) + +(119) Exchange +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(120) ShuffledHashJoin +Left keys [1]: [l_orderkey#X] +Right keys [1]: [o_orderkey#X] +Join type: Inner +Join condition: None + +(121) Project +Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] + +(122) Exchange +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(123) Scan parquet +Output [2]: [n_nationkey#X, n_name#X] +Batched: true +Location: InMemoryFileIndex [*] +PushedFilters: [IsNotNull(n_nationkey)] +ReadSchema: struct + +(124) Filter +Input [2]: [n_nationkey#X, n_name#X] +Condition : isnotnull(n_nationkey#X) + +(125) Exchange +Input [2]: [n_nationkey#X, n_name#X] +Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(126) ShuffledHashJoin +Left keys [1]: [s_nationkey#X] +Right keys [1]: [n_nationkey#X] +Join type: Inner +Join condition: None + +(127) Project +Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] + +(128) HashAggregate +Input [3]: [nation#X, o_year#X, amount#X] +Keys [2]: [nation#X, o_year#X] +Functions [1]: [partial_sum(amount#X)] +Aggregate Attributes [2]: [sum#X, isEmpty#X] +Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] + +(129) Exchange +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] +Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(130) HashAggregate +Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] +Keys [2]: [nation#X, o_year#X] +Functions [1]: [sum(amount#X)] +Aggregate Attributes [1]: [sum(amount#X)#X] +Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] + +(131) Exchange +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] + +(132) Sort +Input [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 + +(133) AdaptiveSparkPlan +Output [3]: [nation#X, o_year#X, sum_profit#X] +Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala index 587492ef346a..33f5e48adea5 100644 --- a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala @@ -449,4 +449,10 @@ class VeloxFunctionsValidateSuite extends VeloxWholeStageTransformerSuite { checkOperatorMatch[ProjectExecTransformer] } } + + test("Test unhex function") { + runQueryAndCompare("SELECT unhex(hex(l_shipmode)) FROM lineitem limit 1") { + checkOperatorMatch[ProjectExecTransformer] + } + } } diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxTPCHSuite.scala b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxTPCHSuite.scala index 8b3fb326e1ac..8bcc16fac408 100644 --- a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxTPCHSuite.scala +++ b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxTPCHSuite.scala @@ -87,7 +87,7 @@ abstract class VeloxTPCHSuite extends VeloxTPCHTableSupport { formatSparkVersion match { case "32" => true case "33" => true - case "34" => false + case "34" => true case _ => false } ) diff --git a/cpp/velox/memory/VeloxMemoryManager.cc b/cpp/velox/memory/VeloxMemoryManager.cc index 3f6682e2cf26..0d22a9d0891f 100644 --- a/cpp/velox/memory/VeloxMemoryManager.cc +++ b/cpp/velox/memory/VeloxMemoryManager.cc @@ -63,8 +63,11 @@ class ListenableArbitrator : public velox::memory::MemoryArbitrator { return true; } - uint64_t shrinkCapacity(const std::vector>& pools, uint64_t targetBytes) - override { + uint64_t shrinkCapacity( + const std::vector>& pools, + uint64_t targetBytes, + bool allowSpill = true, + bool allowAbort = false) override { facebook::velox::exec::MemoryReclaimer::Stats status; GLUTEN_CHECK(pools.size() == 1, "Should shrink a single pool at a time"); std::lock_guard l(mutex_); // FIXME: Do we have recursive locking for this mutex? diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index e6b3a171631d..2845cfa6414d 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_03_12 +VELOX_BRANCH=2024_03_13 VELOX_HOME="" #Set on run gluten on HDFS diff --git a/gluten-core/src/main/scala/io/glutenproject/execution/BatchScanExecTransformer.scala b/gluten-core/src/main/scala/io/glutenproject/execution/BatchScanExecTransformer.scala index 5e1bd128942f..dfb448f13242 100644 --- a/gluten-core/src/main/scala/io/glutenproject/execution/BatchScanExecTransformer.scala +++ b/gluten-core/src/main/scala/io/glutenproject/execution/BatchScanExecTransformer.scala @@ -32,13 +32,7 @@ import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.sql.types.StructType import org.apache.spark.sql.vectorized.ColumnarBatch -/** - * Columnar Based BatchScanExec. Although keyGroupedPartitioning is not used, it cannot be deleted, - * it can make BatchScanExecTransformer contain a constructor with the same parameters as - * Spark-3.3's BatchScanExec. Otherwise, the corresponding constructor will not be found when - * calling TreeNode.makeCopy and will fail to copy this node during transformation. - */ - +/** Columnar Based BatchScanExec. */ case class BatchScanExecTransformer( override val output: Seq[AttributeReference], @transient override val scan: Scan, @@ -80,7 +74,16 @@ abstract class BatchScanExecTransformerBase( override val commonPartitionValues: Option[Seq[(InternalRow, Int)]] = None, override val applyPartialClustering: Boolean = false, override val replicatePartitions: Boolean = false) - extends BatchScanExecShim(output, scan, runtimeFilters, table = table) + extends BatchScanExecShim( + output, + scan, + runtimeFilters, + keyGroupedPartitioning, + ordering, + table, + commonPartitionValues, + applyPartialClustering, + replicatePartitions) with BasicScanExecTransformer { // Note: "metrics" is made transient to avoid sending driver-side metrics to tasks. diff --git a/gluten-iceberg/src/main/scala/io/glutenproject/execution/IcebergScanTransformer.scala b/gluten-iceberg/src/main/scala/io/glutenproject/execution/IcebergScanTransformer.scala index 1c2c189edc12..fdb82f23e6c8 100644 --- a/gluten-iceberg/src/main/scala/io/glutenproject/execution/IcebergScanTransformer.scala +++ b/gluten-iceberg/src/main/scala/io/glutenproject/execution/IcebergScanTransformer.scala @@ -20,6 +20,7 @@ import io.glutenproject.sql.shims.SparkShimLoader import io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat import io.glutenproject.substrait.rel.SplitInfo +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DynamicPruningExpression, Expression, Literal} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.connector.catalog.Table @@ -33,12 +34,17 @@ case class IcebergScanTransformer( override val output: Seq[AttributeReference], @transient override val scan: Scan, override val runtimeFilters: Seq[Expression], - @transient override val table: Table) + @transient override val table: Table, + override val keyGroupedPartitioning: Option[Seq[Expression]] = None, + override val commonPartitionValues: Option[Seq[(InternalRow, Int)]] = None) extends BatchScanExecTransformerBase( output = output, scan = scan, runtimeFilters = runtimeFilters, - table = table) { + table = table, + keyGroupedPartitioning = keyGroupedPartitioning, + commonPartitionValues = commonPartitionValues + ) { override def filterExprs(): Seq[Expression] = pushdownFilters.getOrElse(Seq.empty) @@ -51,7 +57,12 @@ case class IcebergScanTransformer( override lazy val fileFormat: ReadFileFormat = GlutenIcebergSourceUtil.getFileFormat(scan) override def getSplitInfos: Seq[SplitInfo] = { - getPartitions.zipWithIndex.map { + val groupedPartitions = SparkShimLoader.getSparkShims.orderPartitions( + scan, + keyGroupedPartitioning, + filteredPartitions, + outputPartitioning) + groupedPartitions.zipWithIndex.map { case (p, index) => GlutenIcebergSourceUtil.genSplitInfo(p, index) } } @@ -64,6 +75,8 @@ case class IcebergScanTransformer( output) ) } + // Needed for tests + private[execution] def getKeyGroupPartitioning: Option[Seq[Expression]] = keyGroupedPartitioning } object IcebergScanTransformer { @@ -74,6 +87,9 @@ object IcebergScanTransformer { batchScan.output, batchScan.scan, newPartitionFilters, - table = SparkShimLoader.getSparkShims.getBatchScanExecTable(batchScan)) + table = SparkShimLoader.getSparkShims.getBatchScanExecTable(batchScan), + keyGroupedPartitioning = SparkShimLoader.getSparkShims.getKeyGroupedPartitioning(batchScan), + commonPartitionValues = SparkShimLoader.getSparkShims.getCommonPartitionValues(batchScan) + ) } } diff --git a/gluten-iceberg/src/test/scala/io/glutenproject/execution/VeloxIcebergSuite.scala b/gluten-iceberg/src/test/scala/io/glutenproject/execution/VeloxIcebergSuite.scala index fcb95ace9255..019c602957fe 100644 --- a/gluten-iceberg/src/test/scala/io/glutenproject/execution/VeloxIcebergSuite.scala +++ b/gluten-iceberg/src/test/scala/io/glutenproject/execution/VeloxIcebergSuite.scala @@ -43,6 +43,21 @@ class VeloxIcebergSuite extends WholeStageTransformerSuite { .set("spark.sql.catalog.spark_catalog.warehouse", s"file://$rootPath/tpch-data-iceberg-velox") } + private def isSparkVersionAtleast(version: String): Boolean = { + val currentVersion = spark.version + val currentVersionSplit = currentVersion.split("\\.") + val versionSplit = version.split("\\.") + currentVersionSplit.zip(versionSplit).foreach { + case (current, required) => + if (current.toInt > required.toInt) { + return true + } else if (current.toInt < required.toInt) { + return false + } + } + true + } + test("iceberg transformer exists") { spark.sql(""" |create table iceberg_tb using iceberg as @@ -56,44 +71,246 @@ class VeloxIcebergSuite extends WholeStageTransformerSuite { } } - test("iceberg partitioned table") { - withTable("p_str_tb", "p_int_tb") { + test("iceberg bucketed join") { + assume(isSparkVersionAtleast("3.4")) + val leftTable = "p_str_tb" + val rightTable = "p_int_tb" + withTable(leftTable, rightTable) { // Partition key of string type. withSQLConf(GlutenConfig.GLUTEN_ENABLE_KEY -> "false") { // Gluten does not support write iceberg table. + spark.sql(s""" + |create table $leftTable(id int, name string, p string) + |using iceberg + |partitioned by (bucket(4, id)); + |""".stripMargin) spark.sql( - """ - |create table p_str_tb(id int, name string, p string) using iceberg partitioned by (p); - |""".stripMargin) + s""" + |insert into table $leftTable values + |(4, 'a5', 'p4'), + |(1, 'a1', 'p1'), + |(2, 'a3', 'p2'), + |(1, 'a2', 'p1'), + |(3, 'a4', 'p3'); + |""".stripMargin + ) + } + + // Partition key of integer type. + withSQLConf( + GlutenConfig.GLUTEN_ENABLE_KEY -> "false" + ) { + // Gluten does not support write iceberg table. + spark.sql(s""" + |create table $rightTable(id int, name string, p int) + |using iceberg + |partitioned by (bucket(4, id)); + |""".stripMargin) spark.sql( - """ - |insert into table p_str_tb values(1, 'a1', 'p1'), (2, 'a2', 'p1'), (3, 'a3', 'p2'); - |""".stripMargin + s""" + |insert into table $rightTable values + |(3, 'b4', 23), + |(1, 'b2', 21), + |(4, 'b5', 24), + |(2, 'b3', 22), + |(1, 'b1', 21); + |""".stripMargin ) } - runQueryAndCompare(""" - |select * from p_str_tb; - |""".stripMargin) { - checkOperatorMatch[IcebergScanTransformer] + + withSQLConf( + "spark.sql.sources.v2.bucketing.enabled" -> "true", + "spark.sql.requireAllClusterKeysForCoPartition" -> "false", + "spark.sql.adaptive.enabled" -> "false", + "spark.sql.iceberg.planning.preserve-data-grouping" -> "true", + "spark.sql.autoBroadcastJoinThreshold" -> "-1", + "spark.sql.sources.v2.bucketing.pushPartValues.enabled" -> "true" + ) { + runQueryAndCompare(s""" + |select s.id, s.name, i.name, i.p + | from $leftTable s inner join $rightTable i + | on s.id = i.id; + |""".stripMargin) { + df => + { + assert( + getExecutedPlan(df).count( + plan => { + plan.isInstanceOf[IcebergScanTransformer] + }) == 2) + getExecutedPlan(df).map { + case plan if plan.isInstanceOf[IcebergScanTransformer] => + assert( + plan.asInstanceOf[IcebergScanTransformer].getKeyGroupPartitioning.isDefined) + assert(plan.asInstanceOf[IcebergScanTransformer].getSplitInfos.length == 3) + case _ => // do nothing + } + checkLengthAndPlan(df, 7) + } + } + } + } + } + + test("iceberg bucketed join with partition") { + assume(isSparkVersionAtleast("3.4")) + val leftTable = "p_str_tb" + val rightTable = "p_int_tb" + withTable(leftTable, rightTable) { + // Partition key of string type. + withSQLConf(GlutenConfig.GLUTEN_ENABLE_KEY -> "false") { + // Gluten does not support write iceberg table. + spark.sql(s""" + |create table $leftTable(id int, name string, p int) + |using iceberg + |partitioned by (bucket(4, id), p); + |""".stripMargin) + spark.sql( + s""" + |insert into table $leftTable values + |(4, 'a5', 2), + |(1, 'a1', 1), + |(2, 'a3', 1), + |(1, 'a2', 1), + |(3, 'a4', 2); + |""".stripMargin + ) } // Partition key of integer type. + withSQLConf( + GlutenConfig.GLUTEN_ENABLE_KEY -> "false" + ) { + // Gluten does not support write iceberg table. + spark.sql(s""" + |create table $rightTable(id int, name string, p int) + |using iceberg + |partitioned by (bucket(4, id), p); + |""".stripMargin) + spark.sql( + s""" + |insert into table $rightTable values + |(3, 'b4', 2), + |(1, 'b2', 1), + |(4, 'b5', 2), + |(2, 'b3', 1), + |(1, 'b1', 1); + |""".stripMargin + ) + } + + withSQLConf( + "spark.sql.sources.v2.bucketing.enabled" -> "true", + "spark.sql.requireAllClusterKeysForCoPartition" -> "false", + "spark.sql.adaptive.enabled" -> "false", + "spark.sql.iceberg.planning.preserve-data-grouping" -> "true", + "spark.sql.autoBroadcastJoinThreshold" -> "-1", + "spark.sql.sources.v2.bucketing.pushPartValues.enabled" -> "true" + ) { + runQueryAndCompare(s""" + |select s.id, s.name, i.name, i.p + | from $leftTable s inner join $rightTable i + | on s.id = i.id and s.p = i.p; + |""".stripMargin) { + df => + { + assert( + getExecutedPlan(df).count( + plan => { + plan.isInstanceOf[IcebergScanTransformer] + }) == 2) + getExecutedPlan(df).map { + case plan if plan.isInstanceOf[IcebergScanTransformer] => + assert( + plan.asInstanceOf[IcebergScanTransformer].getKeyGroupPartitioning.isDefined) + assert(plan.asInstanceOf[IcebergScanTransformer].getSplitInfos.length == 3) + case _ => // do nothing + } + checkLengthAndPlan(df, 7) + } + } + } + } + } + + test("iceberg bucketed join with partition filter") { + assume(isSparkVersionAtleast("3.4")) + val leftTable = "p_str_tb" + val rightTable = "p_int_tb" + withTable(leftTable, rightTable) { + // Partition key of string type. withSQLConf(GlutenConfig.GLUTEN_ENABLE_KEY -> "false") { // Gluten does not support write iceberg table. + spark.sql(s""" + |create table $leftTable(id int, name string, p int) + |using iceberg + |partitioned by (bucket(4, id), p); + |""".stripMargin) spark.sql( - """ - |create table p_int_tb(id int, name string, p int) using iceberg partitioned by (p); - |""".stripMargin) + s""" + |insert into table $leftTable values + |(4, 'a5', 2), + |(1, 'a1', 1), + |(2, 'a3', 1), + |(1, 'a2', 1), + |(3, 'a4', 2); + |""".stripMargin + ) + } + + // Partition key of integer type. + withSQLConf( + GlutenConfig.GLUTEN_ENABLE_KEY -> "false" + ) { + // Gluten does not support write iceberg table. + spark.sql(s""" + |create table $rightTable(id int, name string, p int) + |using iceberg + |partitioned by (bucket(4, id), p); + |""".stripMargin) spark.sql( - """ - |insert into table p_int_tb values(1, 'a1', 1), (2, 'a2', 1), (3, 'a3', 2); - |""".stripMargin + s""" + |insert into table $rightTable values + |(3, 'b4', 2), + |(1, 'b2', 1), + |(4, 'b5', 2), + |(2, 'b3', 1), + |(1, 'b1', 1); + |""".stripMargin ) } - runQueryAndCompare(""" - |select * from p_int_tb; - |""".stripMargin) { - checkOperatorMatch[IcebergScanTransformer] + + withSQLConf( + "spark.sql.sources.v2.bucketing.enabled" -> "true", + "spark.sql.requireAllClusterKeysForCoPartition" -> "false", + "spark.sql.adaptive.enabled" -> "false", + "spark.sql.iceberg.planning.preserve-data-grouping" -> "true", + "spark.sql.autoBroadcastJoinThreshold" -> "-1", + "spark.sql.sources.v2.bucketing.pushPartValues.enabled" -> "true" + ) { + runQueryAndCompare(s""" + |select s.id, s.name, i.name, i.p + | from $leftTable s inner join $rightTable i + | on s.id = i.id + | where s.p = 1 and i.p = 1; + |""".stripMargin) { + df => + { + assert( + getExecutedPlan(df).count( + plan => { + plan.isInstanceOf[IcebergScanTransformer] + }) == 2) + getExecutedPlan(df).map { + case plan if plan.isInstanceOf[IcebergScanTransformer] => + assert( + plan.asInstanceOf[IcebergScanTransformer].getKeyGroupPartitioning.isDefined) + assert(plan.asInstanceOf[IcebergScanTransformer].getSplitInfos.length == 1) + case _ => // do nothing + } + checkLengthAndPlan(df, 5) + } + } } } } diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala index fd54a4c9cd9f..81c664526085 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala @@ -52,14 +52,14 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { "datetime-parsing-legacy.sql", "datetime-parsing.sql", "datetime-special.sql", - // "decimalArithmeticOperations.sql", + "decimalArithmeticOperations.sql", // "describe-part-after-analyze.sql", "describe-query.sql", "describe-table-after-alter-table.sql", "describe-table-column.sql", "describe.sql", "except-all.sql", - // "except.sql", + "except.sql", "extract.sql", "group-by-filter.sql", "group-by-ordinal.sql", @@ -168,25 +168,25 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { "postgreSQL/window_part4.sql", "postgreSQL/with.sql", "datetime-special.sql", - // "timestamp-ansi.sql", + "timestamp-ansi.sql", "timestamp.sql", "arrayJoin.sql", "binaryComparison.sql", - // "booleanEquality.sql", - // "caseWhenCoercion.sql", + "booleanEquality.sql", + "caseWhenCoercion.sql", "concat.sql", - // "dateTimeOperations.sql", - // "decimalPrecision.sql", - // "division.sql", + "dateTimeOperations.sql", + "decimalPrecision.sql", + "division.sql", "elt.sql", - // "ifCoercion.sql", + "ifCoercion.sql", "implicitTypeCasts.sql", - // "inConversion.sql", - // "mapZipWith.sql", - // "promoteStrings.sql", - // "stringCastAndExpressions.sql", - // "widenSetOperationTypes.sql", - // "windowFrameCoercion.sql", + "inConversion.sql", + "mapZipWith.sql", + "promoteStrings.sql", + "stringCastAndExpressions.sql", + "widenSetOperationTypes.sql", + "windowFrameCoercion.sql", "timestamp-ltz.sql", "timestamp-ntz.sql", "timezone.sql", @@ -194,7 +194,7 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { "try_arithmetic.sql", "try_cast.sql", "udaf.sql", - // "union.sql", + "union.sql", "using-join.sql", "window.sql", "udf-union.sql", diff --git a/shims/common/src/main/scala/io/glutenproject/sql/shims/SparkShims.scala b/shims/common/src/main/scala/io/glutenproject/sql/shims/SparkShims.scala index a2c4599b67f3..64ed1b866c0c 100644 --- a/shims/common/src/main/scala/io/glutenproject/sql/shims/SparkShims.scala +++ b/shims/common/src/main/scala/io/glutenproject/sql/shims/SparkShims.scala @@ -21,16 +21,18 @@ import io.glutenproject.expression.Sig import org.apache.spark.{SparkContext, TaskContext} import org.apache.spark.internal.io.FileCommitProtocol import org.apache.spark.scheduler.TaskInfo -import org.apache.spark.shuffle.{ShuffleHandle, ShuffleReader} +import org.apache.spark.shuffle.ShuffleHandle import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.plans.physical.Distribution +import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.Table import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.connector.read.{InputPartition, Scan} import org.apache.spark.sql.execution.{FileSourceScanExec, GlobalLimitExec, SparkPlan, TakeOrderedAndProjectExec} import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, PartitionDirectory, PartitionedFile, PartitioningAwareFileIndex, WriteJobDescription, WriteTaskResult} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec @@ -165,4 +167,14 @@ trait SparkShims { // For compatibility with Spark-3.5. def getAnalysisExceptionPlan(ae: AnalysisException): Option[LogicalPlan] + + def getKeyGroupedPartitioning(batchScan: BatchScanExec): Option[Seq[Expression]] + + def getCommonPartitionValues(batchScan: BatchScanExec): Option[Seq[(InternalRow, Int)]] + + def orderPartitions( + scan: Scan, + keyGroupedPartitioning: Option[Seq[Expression]], + filteredPartitions: Seq[Seq[InputPartition]], + outputPartitioning: Partitioning): Seq[InputPartition] = filteredPartitions.flatten } diff --git a/shims/spark32/src/main/scala/io/glutenproject/sql/shims/spark32/Spark32Shims.scala b/shims/spark32/src/main/scala/io/glutenproject/sql/shims/spark32/Spark32Shims.scala index 8f2d9025fa42..8f028968e18a 100644 --- a/shims/spark32/src/main/scala/io/glutenproject/sql/shims/spark32/Spark32Shims.scala +++ b/shims/spark32/src/main/scala/io/glutenproject/sql/shims/spark32/Spark32Shims.scala @@ -196,4 +196,8 @@ class Spark32Shims extends SparkShims { ae.plan } + override def getKeyGroupedPartitioning(batchScan: BatchScanExec): Option[Seq[Expression]] = null + + override def getCommonPartitionValues(batchScan: BatchScanExec): Option[Seq[(InternalRow, Int)]] = + null } diff --git a/shims/spark33/src/main/scala/io/glutenproject/sql/shims/spark33/Spark33Shims.scala b/shims/spark33/src/main/scala/io/glutenproject/sql/shims/spark33/Spark33Shims.scala index 8dcdf4f2a41d..8e770325f9b3 100644 --- a/shims/spark33/src/main/scala/io/glutenproject/sql/shims/spark33/Spark33Shims.scala +++ b/shims/spark33/src/main/scala/io/glutenproject/sql/shims/spark33/Spark33Shims.scala @@ -256,4 +256,10 @@ class Spark33Shims extends SparkShims { def getAnalysisExceptionPlan(ae: AnalysisException): Option[LogicalPlan] = { ae.plan } + + override def getKeyGroupedPartitioning(batchScan: BatchScanExec): Option[Seq[Expression]] = { + batchScan.keyGroupedPartitioning + } + override def getCommonPartitionValues(batchScan: BatchScanExec): Option[Seq[(InternalRow, Int)]] = + null } diff --git a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala index cdd571fd0b5f..dcfb5c9501f3 100644 --- a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala +++ b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala @@ -35,6 +35,7 @@ abstract class BatchScanExecShim( override val output: Seq[AttributeReference], @transient override val scan: Scan, override val runtimeFilters: Seq[Expression], + val keyGroupedPartitioning: Option[Seq[Expression]] = None, val ordering: Option[Seq[SortOrder]] = None, @transient val table: Table, val commonPartitionValues: Option[Seq[(InternalRow, Int)]] = None, diff --git a/shims/spark34/src/main/scala/io/glutenproject/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/io/glutenproject/sql/shims/spark34/Spark34Shims.scala index 6942d0ed12ff..c98def5daeff 100644 --- a/shims/spark34/src/main/scala/io/glutenproject/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/io/glutenproject/sql/shims/spark34/Spark34Shims.scala @@ -31,11 +31,13 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.BloomFilterAggregate import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution} +import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution, KeyGroupedPartitioning, Partitioning} import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.util.InternalRowComparableWrapper import org.apache.spark.sql.catalyst.util.TimestampFormatter import org.apache.spark.sql.connector.catalog.Table import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, Scan} import org.apache.spark.sql.execution.{FileSourceScanExec, GlobalLimitExec, GlutenFileFormatWriter, PartitionedFileUtil, SparkPlan, TakeOrderedAndProjectExec} import org.apache.spark.sql.execution.datasources.{BucketingUtils, FileFormat, FilePartition, FileScanRDD, PartitionDirectory, PartitionedFile, PartitioningAwareFileIndex, WriteJobDescription, WriteTaskResult} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec @@ -301,4 +303,45 @@ class Spark34Shims extends SparkShims { def getAnalysisExceptionPlan(ae: AnalysisException): Option[LogicalPlan] = { ae.plan } + + override def getKeyGroupedPartitioning(batchScan: BatchScanExec): Option[Seq[Expression]] = { + batchScan.keyGroupedPartitioning + } + + override def getCommonPartitionValues( + batchScan: BatchScanExec): Option[Seq[(InternalRow, Int)]] = { + batchScan.commonPartitionValues + } + + override def orderPartitions( + scan: Scan, + keyGroupedPartitioning: Option[Seq[Expression]], + filteredPartitions: Seq[Seq[InputPartition]], + outputPartitioning: Partitioning): Seq[InputPartition] = { + scan match { + case _ if keyGroupedPartitioning.isDefined => + var newPartitions = filteredPartitions + outputPartitioning match { + case p: KeyGroupedPartitioning => + val partitionMapping = newPartitions + .map( + s => + InternalRowComparableWrapper( + s.head.asInstanceOf[HasPartitionKey], + p.expressions) -> s) + .toMap + newPartitions = p.partitionValues.map { + partValue => + // Use empty partition for those partition values that are not present + partitionMapping.getOrElse( + InternalRowComparableWrapper(partValue, p.expressions), + Seq.empty) + } + case _ => + } + newPartitions.flatten + case _ => + filteredPartitions.flatten + } + } } diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala index 3cc09068e3d0..4c12356d6378 100644 --- a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala +++ b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.SparkException import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical.KeyGroupedPartitioning import org.apache.spark.sql.catalyst.util.InternalRowComparableWrapper @@ -34,8 +35,22 @@ abstract class BatchScanExecShim( output: Seq[AttributeReference], @transient scan: Scan, runtimeFilters: Seq[Expression], - @transient val table: Table) - extends AbstractBatchScanExec(output, scan, runtimeFilters, table = table) { + keyGroupedPartitioning: Option[Seq[Expression]] = None, + ordering: Option[Seq[SortOrder]] = None, + @transient val table: Table, + commonPartitionValues: Option[Seq[(InternalRow, Int)]] = None, + applyPartialClustering: Boolean = false, + replicatePartitions: Boolean = false) + extends AbstractBatchScanExec( + output, + scan, + runtimeFilters, + keyGroupedPartitioning, + ordering, + table, + commonPartitionValues, + applyPartialClustering, + replicatePartitions) { // Note: "metrics" is made transient to avoid sending driver-side metrics to tasks. @transient override lazy val metrics: Map[String, SQLMetric] = Map() diff --git a/shims/spark35/src/main/scala/io/glutenproject/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/io/glutenproject/sql/shims/spark35/Spark35Shims.scala index 5498a21e1147..6a6f3b2c8fd1 100644 --- a/shims/spark35/src/main/scala/io/glutenproject/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/io/glutenproject/sql/shims/spark35/Spark35Shims.scala @@ -304,4 +304,9 @@ class Spark35Shims extends SparkShims { None } } + + override def getKeyGroupedPartitioning(batchScan: BatchScanExec): Option[Seq[Expression]] = null + + override def getCommonPartitionValues(batchScan: BatchScanExec): Option[Seq[(InternalRow, Int)]] = + null } diff --git a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala index 768957cf2b4e..ec12fd33a755 100644 --- a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala +++ b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala @@ -35,6 +35,8 @@ abstract class BatchScanExecShim( output: Seq[AttributeReference], @transient scan: Scan, runtimeFilters: Seq[Expression], + keyGroupedPartitioning: Option[Seq[Expression]] = None, + ordering: Option[Seq[SortOrder]] = None, @transient val table: Table, val commonPartitionValues: Option[Seq[(InternalRow, Int)]] = None, val applyPartialClustering: Boolean = false,