Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/gaoyangxiaozhu/gluten into …
Browse files Browse the repository at this point in the history
…gayangya/metadatacolumns
  • Loading branch information
gaoyangxiaozhu committed Mar 14, 2024
2 parents a3b0289 + 825cace commit 22821e9
Show file tree
Hide file tree
Showing 59 changed files with 17,294 additions and 62 deletions.
20 changes: 16 additions & 4 deletions .github/workflows/velox_be.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,8 @@ jobs:
path: |
/tmp/${{ github.run_id }}/spark32/tpch-approved-plan/**
- name: Clean temp golden files
if: failure()
run: |
rm -rf /tmp/$GITHUB_RUN_ID/spark32/tpch-approved-plan
rm -rf /tmp/$GITHUB_RUN_ID/spark32
- name: Exit docker container
if: ${{ always() }}
run: |
Expand Down Expand Up @@ -236,9 +235,8 @@ jobs:
path: |
/tmp/${{ github.run_id }}/spark33/tpch-approved-plan/**
- name: Clean temp golden files
if: failure()
run: |
rm -rf /tmp/$GITHUB_RUN_ID/spark33/tpch-approved-plan
rm -rf /tmp/$GITHUB_RUN_ID/spark33
- name: Exit docker container
if: ${{ always() }}
run: |
Expand Down Expand Up @@ -308,6 +306,20 @@ jobs:
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \
mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark342" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \
mvn test -Pspark-3.4 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest'
- name: Copy golden files from container to host
if: failure()
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/cp.sh /tmp/tpch-approved-plan/ /tmp/$GITHUB_RUN_ID/spark34/tpch-approved-plan
- name: Upload golden files
if: failure()
uses: actions/upload-artifact@v4
with:
name: golden-files-spark34
path: |
/tmp/${{ github.run_id }}/spark34/tpch-approved-plan/**
- name: Clean temp golden files
run: |
rm -rf /tmp/$GITHUB_RUN_ID/spark34
- name: Exit docker container
if: ${{ always() }}
run: |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
== Physical Plan ==
AdaptiveSparkPlan (28)
+- == Final Plan ==
VeloxColumnarToRowExec (19)
+- ^ SortExecTransformer (17)
+- ^ InputIteratorTransformer (16)
+- ^ InputAdapter (15)
+- ^ ShuffleQueryStage (14), Statistics(X)
+- ColumnarExchange (13)
+- ^ RegularHashAggregateExecTransformer (11)
+- ^ InputIteratorTransformer (10)
+- ^ InputAdapter (9)
+- ^ ShuffleQueryStage (8), Statistics(X)
+- ColumnarExchange (7)
+- ^ ProjectExecTransformer (5)
+- ^ FlushableHashAggregateExecTransformer (4)
+- ^ ProjectExecTransformer (3)
+- ^ FilterExecTransformer (2)
+- ^ Scan parquet (1)
+- == Initial Plan ==
Sort (27)
+- Exchange (26)
+- HashAggregate (25)
+- Exchange (24)
+- HashAggregate (23)
+- Project (22)
+- Filter (21)
+- Scan parquet (20)


(1) Scan parquet
Output [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X]
Batched: true
Location: InMemoryFileIndex [*]
PushedFilters: [IsNotNull(l_shipdate), LessThanOrEqual(l_shipdate,1998-09-02)]
ReadSchema: struct<l_quantity:decimal(12,2),l_extendedprice:decimal(12,2),l_discount:decimal(12,2),l_tax:decimal(12,2),l_returnflag:string,l_linestatus:string,l_shipdate:date>

(2) FilterExecTransformer
Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X]
Arguments: (isnotnull(l_shipdate#X) AND (l_shipdate#X <= 1998-09-02))

(3) ProjectExecTransformer
Output [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X, ((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)) AS _pre_X#X]
Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X]

(4) FlushableHashAggregateExecTransformer
Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, _pre_X#X, _pre_X#X]
Keys [2]: [l_returnflag#X, l_linestatus#X]
Functions [8]: [partial_sum(l_quantity#X), partial_sum(l_extendedprice#X), partial_sum(_pre_X#X), partial_sum(_pre_X#X), partial_avg(l_quantity#X), partial_avg(l_extendedprice#X), partial_avg(l_discount#X), partial_count(1)]
Aggregate Attributes [15]: [sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Results [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]

(5) ProjectExecTransformer
Output [18]: [hash(l_returnflag#X, l_linestatus#X, 42) AS hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]

(6) WholeStageCodegenTransformer (X)
Input [18]: [hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Arguments: false

(7) ColumnarExchange
Input [18]: [hash_partition_key#X, l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Arguments: hashpartitioning(l_returnflag#X, l_linestatus#X, 1), ENSURE_REQUIREMENTS, [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X], [plan_id=X], [id=#X]

(8) ShuffleQueryStage
Output [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Arguments: X

(9) InputAdapter
Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]

(10) InputIteratorTransformer
Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]

(11) RegularHashAggregateExecTransformer
Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Keys [2]: [l_returnflag#X, l_linestatus#X]
Functions [8]: [sum(l_quantity#X), sum(l_extendedprice#X), sum((l_extendedprice#X * (1 - l_discount#X))), sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), avg(l_quantity#X), avg(l_extendedprice#X), avg(l_discount#X), count(1)]
Aggregate Attributes [8]: [sum(l_quantity#X)#X, sum(l_extendedprice#X)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X, avg(l_quantity#X)#X, avg(l_extendedprice#X)#X, avg(l_discount#X)#X, count(1)#X]
Results [10]: [l_returnflag#X, l_linestatus#X, sum(l_quantity#X)#X AS sum_qty#X, sum(l_extendedprice#X)#X AS sum_base_price#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS sum_disc_price#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X AS sum_charge#X, avg(l_quantity#X)#X AS avg_qty#X, avg(l_extendedprice#X)#X AS avg_price#X, avg(l_discount#X)#X AS avg_disc#X, count(1)#X AS count_order#X]

(12) WholeStageCodegenTransformer (X)
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]
Arguments: false

(13) ColumnarExchange
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]
Arguments: rangepartitioning(l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [id=#X]

(14) ShuffleQueryStage
Output [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]
Arguments: X

(15) InputAdapter
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]

(16) InputIteratorTransformer
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]

(17) SortExecTransformer
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]
Arguments: [l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST], true, 0

(18) WholeStageCodegenTransformer (X)
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]
Arguments: false

(19) VeloxColumnarToRowExec
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]

(20) Scan parquet
Output [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X]
Batched: true
Location: InMemoryFileIndex [*]
PushedFilters: [IsNotNull(l_shipdate), LessThanOrEqual(l_shipdate,1998-09-02)]
ReadSchema: struct<l_quantity:decimal(12,2),l_extendedprice:decimal(12,2),l_discount:decimal(12,2),l_tax:decimal(12,2),l_returnflag:string,l_linestatus:string,l_shipdate:date>

(21) Filter
Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X]
Condition : (isnotnull(l_shipdate#X) AND (l_shipdate#X <= 1998-09-02))

(22) Project
Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X]
Input [7]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X, l_shipdate#X]

(23) HashAggregate
Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, l_tax#X, l_returnflag#X, l_linestatus#X]
Keys [2]: [l_returnflag#X, l_linestatus#X]
Functions [8]: [partial_sum(l_quantity#X), partial_sum(l_extendedprice#X), partial_sum((l_extendedprice#X * (1 - l_discount#X))), partial_sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), partial_avg(l_quantity#X), partial_avg(l_extendedprice#X), partial_avg(l_discount#X), partial_count(1)]
Aggregate Attributes [15]: [sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Results [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]

(24) Exchange
Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Arguments: hashpartitioning(l_returnflag#X, l_linestatus#X, 1), ENSURE_REQUIREMENTS, [plan_id=X]

(25) HashAggregate
Input [17]: [l_returnflag#X, l_linestatus#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, isEmpty#X, sum#X, count#X, sum#X, count#X, sum#X, count#X, count#X]
Keys [2]: [l_returnflag#X, l_linestatus#X]
Functions [8]: [sum(l_quantity#X), sum(l_extendedprice#X), sum((l_extendedprice#X * (1 - l_discount#X))), sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X))), avg(l_quantity#X), avg(l_extendedprice#X), avg(l_discount#X), count(1)]
Aggregate Attributes [8]: [sum(l_quantity#X)#X, sum(l_extendedprice#X)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X, avg(l_quantity#X)#X, avg(l_extendedprice#X)#X, avg(l_discount#X)#X, count(1)#X]
Results [10]: [l_returnflag#X, l_linestatus#X, sum(l_quantity#X)#X AS sum_qty#X, sum(l_extendedprice#X)#X AS sum_base_price#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS sum_disc_price#X, sum(((l_extendedprice#X * (1 - l_discount#X)) * (1 + l_tax#X)))#X AS sum_charge#X, avg(l_quantity#X)#X AS avg_qty#X, avg(l_extendedprice#X)#X AS avg_price#X, avg(l_discount#X)#X AS avg_disc#X, count(1)#X AS count_order#X]

(26) Exchange
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]
Arguments: rangepartitioning(l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X]

(27) Sort
Input [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]
Arguments: [l_returnflag#X ASC NULLS FIRST, l_linestatus#X ASC NULLS FIRST], true, 0

(28) AdaptiveSparkPlan
Output [10]: [l_returnflag#X, l_linestatus#X, sum_qty#X, sum_base_price#X, sum_disc_price#X, sum_charge#X, avg_qty#X, avg_price#X, avg_disc#X, count_order#X]
Arguments: isFinalPlan=true
Loading

0 comments on commit 22821e9

Please sign in to comment.