From 147054d9843c4ed803d36931fa71ef7b8c455d2e Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Mon, 8 Jan 2024 04:50:17 -0700 Subject: [PATCH] opt: add rule to merge GroupBy and Window This commit adds a new norm rule, `FoldGroupByAndWindow`, which can merge a Window operator with a parent GroupBy operator when the grouping columns are the same as the partition columns. See the rule comment for the complete list of conditions. In addition to removing a potentially expensive Window operator, this transformation makes way for other rules to match. Fixes #113292 Release note: None --- .../testdata/benchmark_expectations | 2 +- .../logictest/testdata/logic_test/pg_catalog | 1 + pkg/sql/opt/exec/execbuilder/testdata/explain | 65 +-- pkg/sql/opt/norm/groupby_funcs.go | 76 +++ pkg/sql/opt/norm/rules/groupby.opt | 80 +++ pkg/sql/opt/norm/testdata/rules/groupby | 467 ++++++++++++++++++ pkg/sql/opt/norm/testdata/rules/prune_cols | 74 ++- pkg/sql/opt/norm/window_funcs.go | 21 + 8 files changed, 713 insertions(+), 73 deletions(-) diff --git a/pkg/bench/rttanalysis/testdata/benchmark_expectations b/pkg/bench/rttanalysis/testdata/benchmark_expectations index 5cccd470a0ea..d2b5b1b8d619 100644 --- a/pkg/bench/rttanalysis/testdata/benchmark_expectations +++ b/pkg/bench/rttanalysis/testdata/benchmark_expectations @@ -74,7 +74,7 @@ exp,benchmark 3,Jobs/show_job 3-5,Jobs/show_jobs 3,ORMQueries/activerecord_type_introspection_query -0,ORMQueries/asyncpg_types +4,ORMQueries/asyncpg_types 6,ORMQueries/column_descriptions_json_agg 4,ORMQueries/django_column_introspection_1_table 4,ORMQueries/django_column_introspection_4_tables diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 208ceee38332..73a846d18e1d 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -4490,6 +4490,7 @@ FROM ( WHERE c.relname = 'indexes_table' ) s2 GROUP BY indexname, indisunique, indisprimary, amname, exprdef, attoptions +ORDER BY indexname ---- indexname array_agg indisunique indisprimary array_agg amname exprdef attoptions indexes_include_idx {a,c,d} false false {ASC,ASC,ASC} prefix NULL NULL diff --git a/pkg/sql/opt/exec/execbuilder/testdata/explain b/pkg/sql/opt/exec/execbuilder/testdata/explain index 3269b51d61b6..fbc30e575fe1 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/explain +++ b/pkg/sql/opt/exec/execbuilder/testdata/explain @@ -560,7 +560,8 @@ vectorized: true └── • group (hash) │ group by: column_name, ordinal_position, column_default, is_nullable, generation_expression, is_hidden, crdb_sql_type │ - └── • window + └── • sort + │ order: +index_name │ └── • hash join (left outer) │ equality: (column_name) = (column_name) @@ -734,39 +735,41 @@ vectorized: true │ estimated row count: 3 │ order: +"role" │ - └── • hash join (left outer) + └── • merge join (right outer) │ estimated row count: 3 - │ equality: (username) = (member) - │ left cols are key + │ equality: (member) = (username) + │ right cols are key │ - ├── • group (hash) - │ │ estimated row count: 3 - │ │ group by: username - │ │ - │ └── • window - │ │ estimated row count: 3 - │ │ - │ └── • render - │ │ - │ └── • hash join (left outer) - │ │ estimated row count: 3 - │ │ equality: (username) = (username) - │ │ left cols are key - │ │ - │ ├── • scan - │ │ estimated row count: 3 (100% of the table; stats collected ago) - │ │ table: users@users_user_id_idx - │ │ spans: FULL SCAN - │ │ - │ └── • scan - │ estimated row count: 1 (100% of the table; stats collected ago) - │ table: role_options@primary - │ spans: FULL SCAN + ├── • scan + │ estimated row count: 1 (100% of the table; stats collected ago) + │ table: role_members@role_members_member_idx + │ spans: FULL SCAN │ - └── • scan - estimated row count: 1 (100% of the table; stats collected ago) - table: role_members@role_members_role_idx - spans: FULL SCAN + └── • group (streaming) + │ estimated row count: 3 + │ group by: username + │ ordered: +username + │ + └── • sort + │ estimated row count: 3 + │ order: +username,+option + │ + └── • render + │ + └── • hash join (left outer) + │ estimated row count: 3 + │ equality: (username) = (username) + │ left cols are key + │ + ├── • scan + │ estimated row count: 3 (100% of the table; stats collected ago) + │ table: users@users_user_id_idx + │ spans: FULL SCAN + │ + └── • scan + estimated row count: 1 (100% of the table; stats collected ago) + table: role_options@primary + spans: FULL SCAN # EXPLAIN selecting from a sequence. statement ok diff --git a/pkg/sql/opt/norm/groupby_funcs.go b/pkg/sql/opt/norm/groupby_funcs.go index 78a601d0d6d9..99e0568c2095 100644 --- a/pkg/sql/opt/norm/groupby_funcs.go +++ b/pkg/sql/opt/norm/groupby_funcs.go @@ -382,6 +382,82 @@ func (c *CustomFuncs) MergeAggs( return newAggs } +// CanMergeAggsAndWindow returns true if all the given aggregations satisfy one +// of the following conditions: +// 1. Reference only columns from the input of the Window operator. +// 2. Is a ConstAgg (or similar) that references an input aggregate window +// function. +// +// CanMergeAggsAndWindow expects that all the window functions have been +// verified to be aggregate functions. +func (c *CustomFuncs) CanMergeAggsAndWindow( + aggs memo.AggregationsExpr, windows memo.WindowsExpr, inputCols opt.ColSet, +) bool { + // Collect the columns produced by the window functions. + var windowCols opt.ColSet + for i := range windows { + windowCols.Add(windows[i].Col) + } + for i := range aggs { + if memo.ExtractAggInputColumns(aggs[i].Agg).SubsetOf(inputCols) { + // Condition 1: the aggregate function only references columns from the + // input of the Window operator. It will not be affected by a merge. + // In this case, it doesn't matter what the aggregate is, since it won't + // be modified in any way. + // + // Note that unlike for CanMergeAggs, it is not necessary to check for + // duplicate sensitivity. This is because window operators do not group or + // duplicate rows. + continue + } + // Condition 2: the aggregate function must be a AnyNotNullAgg, ConstAgg, + // ConstNotNullAgg, or FirstAggOp that references a window function. + switch aggs[i].Agg.Op() { + case opt.AnyNotNullAggOp, opt.ConstAggOp, opt.ConstNotNullAggOp, opt.FirstAggOp: + // Ensure that the input to the aggregation is a direct reference to a + // window function, with no intervening logic. + ref, ok := aggs[i].Agg.Child(0).(*memo.VariableExpr) + if !ok { + return false + } + if !windowCols.Contains(ref.Col) { + return false + } + default: + return false + } + } + return true +} + +// MergeAggsAndWindow returns an AggregationsExpr that is equivalent to the +// combination of the given (outer) aggregations and (inner) window functions. +// ConstAgg-like outer aggregations that reference a window function are +// replaced with that window function. +// +// MergeAggs will panic if CanMergeAggs is false. It also expects the given +// window functions to all be aggregate functions. +func (c *CustomFuncs) MergeAggsAndWindow( + aggs memo.AggregationsExpr, windows memo.WindowsExpr, inputCols opt.ColSet, +) memo.AggregationsExpr { + // Create a mapping from column IDs to the window functions that produce them. + colsToWindowFuncs := map[opt.ColumnID]opt.ScalarExpr{} + for i := range windows { + colsToWindowFuncs[windows[i].Col] = windows[i].Function + } + newAggs := make(memo.AggregationsExpr, len(aggs)) + for i := range aggs { + aggCols := memo.ExtractAggInputColumns(aggs[i].Agg) + if aggCols.SubsetOf(inputCols) { + newAggs[i] = aggs[i] + continue + } + windowFunc := colsToWindowFuncs[aggCols.SingleColumn()] + newAggs[i] = c.f.ConstructAggregationsItem(windowFunc, aggs[i].Col) + } + return newAggs +} + // CanEliminateJoinUnderGroupByLeft returns true if the given join can be // eliminated and replaced by its left input. It should be called only when the // join is under a grouping operator that is only using columns from the join's diff --git a/pkg/sql/opt/norm/rules/groupby.opt b/pkg/sql/opt/norm/rules/groupby.opt index 824510ed567b..d87df2926771 100644 --- a/pkg/sql/opt/norm/rules/groupby.opt +++ b/pkg/sql/opt/norm/rules/groupby.opt @@ -587,3 +587,83 @@ (MergeAggs $innerAggs $outerAggs $innerGroupingCols) (MakeGrouping $outerGroupingCols (EmptyOrdering)) ) + +# FoldGroupByAndWindow merges a GroupBy operator with an input Window operator. +# This is possible when the following conditions are satisfied: +# +# 1. The GroupBy is unordered. This may not technically be necessary, but +# avoids complication in determining the correctness of ordering-sensitive +# aggregations. +# +# 2. The window function output cols are functionally determined by the +# partition-by cols. This means that the window function outputs the +# same value for every row in the partition (group). +# +# 3. The Window operator partition-by cols and grouping cols are the same. +# This ensures that an aggregate operator will act on the same set of rows, +# whether it is part of the Window operator or the GroupBy operator. +# +# 4. The window functions are all aggregate functions. This ensures they are +# compatible with GroupBy operators. +# +# 5. Finally, all of the GroupBy's aggregations must satisfy one of two cases: +# a. The aggregate only references cols from the Window operator's input. +# b. The aggregate is a ConstAgg (or ConstNotNull, AnyNotNull, or FirstAgg) +# that passes through the result of a window function. +# +# Assuming all of the above are satisfied, each GroupBy aggregate that only +# references the Window's input can be left alone (5a). Then, each ConstAgg +# referencing a window function can be replaced by that function (5b). +# +# Here's an example with slightly altered SQL syntax: +# +# SELECT max(b), const_agg(foo), const_agg(bar) +# FROM +# ( +# SELECT *, count(c) OVER w AS foo, array_agg(d) OVER w AS bar +# FROM abcd +# WINDOW w AS ( +# PARTITION BY a ORDER BY d +# RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING +# ) +# ) +# GROUP BY a; +# => +# SELECT max(b), count(c), array_agg(d ORDER BY d) FROM abcd GROUP BY a; +# +# Note also that the Window's ordering should be preserved by the GroupBy to +# ensure that ordering-sensitive aggregates produce correct results. +[FoldGroupByAndWindow, Normalize] +(GroupBy | ScalarGroupBy + $window:(Window + $input:* + $windows:* & (WindowsAreAggregations $windows) + $windowPrivate:* + ) & + (ColsAreDeterminedBy + (WindowFuncOutputCols $windows) + $partitionByCols:(WindowPartition $windowPrivate) + $window + ) + $aggs:* & + (CanMergeAggsAndWindow + $aggs + $windows + $inputCols:(OutputCols $input) + ) + $groupingPrivate:* & + (IsUnorderedGrouping $groupingPrivate) & + (ColsAreEqual + $groupingCols:(GroupingCols $groupingPrivate) + $partitionByCols + ) +) +=> +((OpName) + $input + (MergeAggsAndWindow $aggs $windows $inputCols) + (MakeGrouping + (GroupingCols $groupingPrivate) + (WindowOrdering $windowPrivate) + ) +) diff --git a/pkg/sql/opt/norm/testdata/rules/groupby b/pkg/sql/opt/norm/testdata/rules/groupby index 2b4780392d3a..f8ed3b2e2b8c 100644 --- a/pkg/sql/opt/norm/testdata/rules/groupby +++ b/pkg/sql/opt/norm/testdata/rules/groupby @@ -4408,3 +4408,470 @@ scalar-group-by └── aggregations └── sum [as=sum:6, outer=(1)] └── a:1 + +# -------------------------------------------------- +# FoldGroupByAndWindow +# -------------------------------------------------- + +# Case with one partition column and an aggregate that references an input col. +# NOTE: the "foo" and "bar" grouping columns are simplified to ConstAgg. +norm expect=FoldGroupByAndWindow +SELECT sum(v), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + PARTITION BY u ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY u, foo, bar; +---- +project + ├── columns: sum:10!null foo:8!null bar:9!null + └── group-by (hash) + ├── columns: u:1!null count:8!null array_agg:9!null sum:10!null + ├── grouping columns: u:1!null + ├── internal-ordering: +4 opt(1) + ├── key: (1) + ├── fd: (1)-->(8-10) + ├── sort + │ ├── columns: u:1!null v:2!null z:4!null + │ ├── key: (1,2) + │ ├── fd: (1,2)-->(4) + │ ├── ordering: +4 opt(1) [actual: +4] + │ └── scan uvwz + │ ├── columns: u:1!null v:2!null z:4!null + │ ├── key: (1,2) + │ └── fd: (1,2)-->(4) + └── aggregations + ├── sum [as=sum:10, outer=(2)] + │ └── v:2 + ├── count-rows [as=count:8] + └── array-agg [as=array_agg:9, outer=(4)] + └── z:4 + +# Case with a ConstAgg referencing an input column. +norm expect=FoldGroupByAndWindow +SELECT sum(v), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo + FROM (SELECT *, 100 AS bar FROM uvwz) + WINDOW w AS ( + PARTITION BY u ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY u, foo, bar; +---- +project + ├── columns: sum:10!null foo:9!null bar:8!null + ├── fd: ()-->(8) + └── group-by (hash) + ├── columns: u:1!null bar:8!null count:9!null sum:10!null + ├── grouping columns: u:1!null + ├── internal-ordering: +4 opt(1,8) + ├── key: (1) + ├── fd: ()-->(8), (1)-->(8-10) + ├── project + │ ├── columns: bar:8!null u:1!null v:2!null z:4!null + │ ├── key: (1,2) + │ ├── fd: ()-->(8), (1,2)-->(4) + │ ├── ordering: +4 opt(1,8) [actual: +4] + │ ├── sort + │ │ ├── columns: u:1!null v:2!null z:4!null + │ │ ├── key: (1,2) + │ │ ├── fd: (1,2)-->(4) + │ │ ├── ordering: +4 opt(1) [actual: +4] + │ │ └── scan uvwz + │ │ ├── columns: u:1!null v:2!null z:4!null + │ │ ├── key: (1,2) + │ │ └── fd: (1,2)-->(4) + │ └── projections + │ └── 100 [as=bar:8] + └── aggregations + ├── sum [as=sum:10, outer=(2)] + │ └── v:2 + ├── const-agg [as=bar:8, outer=(8)] + │ └── bar:8 + └── count-rows [as=count:9] + +# Case with no partition columns. +norm expect=FoldGroupByAndWindow +SELECT sum(v), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY foo, bar; +---- +group-by (streaming) + ├── columns: sum:10!null foo:8!null bar:9!null + ├── internal-ordering: +4 + ├── cardinality: [0 - 1] + ├── key: () + ├── fd: ()-->(8-10) + ├── sort + │ ├── columns: v:2!null z:4!null + │ ├── ordering: +4 + │ └── scan uvwz + │ └── columns: v:2!null z:4!null + └── aggregations + ├── sum [as=sum:10, outer=(2)] + │ └── v:2 + ├── count-rows [as=count:8] + └── array-agg [as=array_agg:9, outer=(4)] + └── z:4 + +# Case with multiple partition columns. +norm expect=FoldGroupByAndWindow +SELECT sum(v), foo, bar +FROM ( + SELECT *, count(v) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + PARTITION BY u, w ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY u, w, foo, bar; +---- +project + ├── columns: sum:10!null foo:8!null bar:9!null + └── group-by (hash) + ├── columns: u:1!null w:3!null count:8!null array_agg:9!null sum:10!null + ├── grouping columns: u:1!null w:3!null + ├── internal-ordering: +4 opt(1,3) + ├── key: (1,3) + ├── fd: (1,3)-->(8-10) + ├── sort + │ ├── columns: u:1!null v:2!null w:3!null z:4!null + │ ├── key: (2,3) + │ ├── fd: (1,2)-->(3,4), (2,3)-->(1,4) + │ ├── ordering: +4 opt(1,3) [actual: +4] + │ └── scan uvwz + │ ├── columns: u:1!null v:2!null w:3!null z:4!null + │ ├── key: (2,3) + │ └── fd: (1,2)-->(3,4), (2,3)-->(1,4) + └── aggregations + ├── sum [as=sum:10, outer=(2)] + │ └── v:2 + ├── count-rows [as=count:8] + └── array-agg [as=array_agg:9, outer=(4)] + └── z:4 + +# Case with grouping/partitioning on key columns. +norm expect=FoldGroupByAndWindow +SELECT sum(w), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + PARTITION BY u, v ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY u, v, foo, bar; +---- +project + ├── columns: sum:10!null foo:8!null bar:9!null + └── group-by (hash) + ├── columns: u:1!null v:2!null count:8!null array_agg:9!null sum:10!null + ├── grouping columns: u:1!null v:2!null + ├── key: (1,2) + ├── fd: (1,2)-->(8-10) + ├── scan uvwz + │ ├── columns: u:1!null v:2!null w:3!null z:4!null + │ ├── key: (2,3) + │ └── fd: (1,2)-->(3,4), (2,3)-->(1,4) + └── aggregations + ├── sum [as=sum:10, outer=(3)] + │ └── w:3 + ├── count-rows [as=count:8] + └── array-agg [as=array_agg:9, outer=(4)] + └── z:4 + +# No-op because of an ordered GroupBy. +norm expect-not=FoldGroupByAndWindow +SELECT array_agg(v), foo, bar +FROM ( + SELECT * FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + PARTITION BY u ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + ORDER BY v DESC +) +GROUP BY u, foo, bar; +---- +project + ├── columns: array_agg:10!null foo:8 bar:9 + └── group-by (hash) + ├── columns: u:1!null count:8 array_agg:9 array_agg:10!null + ├── grouping columns: u:1!null + ├── internal-ordering: -2 opt(1,8,9) + ├── key: (1) + ├── fd: (1)-->(8-10) + ├── sort + │ ├── columns: u:1!null v:2!null w:3!null z:4!null count:8 array_agg:9 + │ ├── key: (2,3) + │ ├── fd: (1,2)-->(3,4), (2,3)-->(1,4), (1)-->(8,9) + │ ├── ordering: -2 opt(1,8,9) [actual: -2] + │ └── window partition=(1) ordering=+4 opt(1) + │ ├── columns: u:1!null v:2!null w:3!null z:4!null count:8 array_agg:9 + │ ├── key: (2,3) + │ ├── fd: (1,2)-->(3,4), (2,3)-->(1,4), (1)-->(8,9) + │ ├── scan uvwz + │ │ ├── columns: u:1!null v:2!null w:3!null z:4!null + │ │ ├── key: (2,3) + │ │ └── fd: (1,2)-->(3,4), (2,3)-->(1,4) + │ └── windows + │ ├── count [as=count:8, frame="rows from unbounded to unbounded", outer=(3)] + │ │ └── w:3 + │ └── array-agg [as=array_agg:9, frame="rows from unbounded to unbounded", outer=(4)] + │ └── z:4 + └── aggregations + ├── array-agg [as=array_agg:10, outer=(2)] + │ └── v:2 + ├── const-agg [as=count:8, outer=(8)] + │ └── count:8 + └── const-agg [as=array_agg:9, outer=(9)] + └── array_agg:9 + +# No-op because the grouping columns reference a window function. +norm expect-not=FoldGroupByAndWindow +SELECT sum(v), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar, row_number() OVER w AS baz + FROM uvwz + WINDOW w AS ( + PARTITION BY u ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY u, foo, bar, baz; +---- +project + ├── columns: sum:11!null foo:8 bar:9 + └── group-by (hash) + ├── columns: u:1!null count:8 array_agg:9 row_number:10 sum:11!null + ├── grouping columns: u:1!null row_number:10 + ├── key: (1,10) + ├── fd: (1)-->(8,9), (1,10)-->(8,9,11) + ├── window partition=(1) ordering=+4 opt(1) + │ ├── columns: u:1!null v:2!null w:3!null z:4!null count:8 array_agg:9 row_number:10 + │ ├── key: (2,3) + │ ├── fd: (1,2)-->(3,4), (2,3)-->(1,4,10), (1)-->(8,9) + │ ├── scan uvwz + │ │ ├── columns: u:1!null v:2!null w:3!null z:4!null + │ │ ├── key: (2,3) + │ │ └── fd: (1,2)-->(3,4), (2,3)-->(1,4) + │ └── windows + │ ├── count [as=count:8, frame="rows from unbounded to unbounded", outer=(3)] + │ │ └── w:3 + │ ├── array-agg [as=array_agg:9, frame="rows from unbounded to unbounded", outer=(4)] + │ │ └── z:4 + │ └── row-number [as=row_number:10, frame="rows from unbounded to unbounded"] + └── aggregations + ├── sum [as=sum:11, outer=(2)] + │ └── v:2 + ├── const-agg [as=count:8, outer=(8)] + │ └── count:8 + └── const-agg [as=array_agg:9, outer=(9)] + └── array_agg:9 + +# No-op because the grouping columns are not the same as the partition columns. +norm expect-not=FoldGroupByAndWindow +SELECT sum(u), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + PARTITION BY u ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY v, foo, bar; +---- +project + ├── columns: sum:10!null foo:8 bar:9 + └── group-by (hash) + ├── columns: v:2!null count:8 array_agg:9 sum:10!null + ├── grouping columns: v:2!null count:8 array_agg:9 + ├── key: (2,8,9) + ├── fd: (2,8,9)-->(10) + ├── window partition=(1) ordering=+4 opt(1) + │ ├── columns: u:1!null v:2!null w:3!null z:4!null count:8 array_agg:9 + │ ├── key: (2,3) + │ ├── fd: (1,2)-->(3,4), (2,3)-->(1,4), (1)-->(8,9) + │ ├── scan uvwz + │ │ ├── columns: u:1!null v:2!null w:3!null z:4!null + │ │ ├── key: (2,3) + │ │ └── fd: (1,2)-->(3,4), (2,3)-->(1,4) + │ └── windows + │ ├── count [as=count:8, frame="rows from unbounded to unbounded", outer=(3)] + │ │ └── w:3 + │ └── array-agg [as=array_agg:9, frame="rows from unbounded to unbounded", outer=(4)] + │ └── z:4 + └── aggregations + └── sum [as=sum:10, outer=(1)] + └── u:1 + +# No-op because the grouping columns are not the same as the partition columns. +norm expect-not=FoldGroupByAndWindow +SELECT sum(v), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY u, foo, bar; +---- +project + ├── columns: sum:10!null foo:8 bar:9 + ├── fd: ()-->(8,9) + └── group-by (hash) + ├── columns: u:1!null count:8 array_agg:9 sum:10!null + ├── grouping columns: u:1!null + ├── key: (1) + ├── fd: ()-->(8,9), (1)-->(8-10) + ├── window partition=() ordering=+4 + │ ├── columns: u:1!null v:2!null w:3!null z:4!null count:8 array_agg:9 + │ ├── key: (2,3) + │ ├── fd: ()-->(8,9), (1,2)-->(3,4), (2,3)-->(1,4) + │ ├── scan uvwz + │ │ ├── columns: u:1!null v:2!null w:3!null z:4!null + │ │ ├── key: (2,3) + │ │ └── fd: (1,2)-->(3,4), (2,3)-->(1,4) + │ └── windows + │ ├── count [as=count:8, frame="rows from unbounded to unbounded", outer=(3)] + │ │ └── w:3 + │ └── array-agg [as=array_agg:9, frame="rows from unbounded to unbounded", outer=(4)] + │ └── z:4 + └── aggregations + ├── sum [as=sum:10, outer=(2)] + │ └── v:2 + ├── const-agg [as=count:8, outer=(8)] + │ └── count:8 + └── const-agg [as=array_agg:9, outer=(9)] + └── array_agg:9 + +# No-op because the window frame is not unbounded. +norm expect-not=FoldGroupByAndWindow +SELECT sum(v), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + PARTITION BY u ORDER BY z + ) +) +GROUP BY u, foo, bar; +---- +project + ├── columns: sum:10!null foo:8 bar:9 + └── group-by (hash) + ├── columns: u:1!null count:8 array_agg:9 sum:10!null + ├── grouping columns: u:1!null count:8 array_agg:9 + ├── key: (1,8,9) + ├── fd: (1,8,9)-->(10) + ├── window partition=(1) ordering=+4 opt(1) + │ ├── columns: u:1!null v:2!null w:3!null z:4!null count:8 array_agg:9 + │ ├── key: (2,3) + │ ├── fd: (1,2)-->(3,4), (2,3)-->(1,4,8,9) + │ ├── scan uvwz + │ │ ├── columns: u:1!null v:2!null w:3!null z:4!null + │ │ ├── key: (2,3) + │ │ └── fd: (1,2)-->(3,4), (2,3)-->(1,4) + │ └── windows + │ ├── count [as=count:8, outer=(3)] + │ │ └── w:3 + │ └── array-agg [as=array_agg:9, outer=(4)] + │ └── z:4 + └── aggregations + └── sum [as=sum:10, outer=(2)] + └── v:2 + +# No-op because the (non ConstAgg-like) Sum GroupBy aggregate references a +# Window aggregate. +norm expect-not=FoldGroupByAndWindow +SELECT sum(foo), foo, bar +FROM ( + SELECT *, count(w) OVER w AS foo, array_agg(z) OVER w AS bar + FROM uvwz + WINDOW w AS ( + PARTITION BY u ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY u, foo, bar; +---- +project + ├── columns: sum:10 foo:8 bar:9 + └── group-by (hash) + ├── columns: u:1!null count:8 array_agg:9 sum:10 + ├── grouping columns: u:1!null + ├── key: (1) + ├── fd: (1)-->(8-10) + ├── window partition=(1) ordering=+4 opt(1) + │ ├── columns: u:1!null w:3!null z:4!null count:8 array_agg:9 + │ ├── fd: (1)-->(8,9) + │ ├── scan uvwz + │ │ └── columns: u:1!null w:3!null z:4!null + │ └── windows + │ ├── count [as=count:8, frame="rows from unbounded to unbounded", outer=(3)] + │ │ └── w:3 + │ └── array-agg [as=array_agg:9, frame="rows from unbounded to unbounded", outer=(4)] + │ └── z:4 + └── aggregations + ├── sum [as=sum:10, outer=(8)] + │ └── count:8 + ├── const-agg [as=count:8, outer=(8)] + │ └── count:8 + └── const-agg [as=array_agg:9, outer=(9)] + └── array_agg:9 + +# No-op case with row_number, which can produce a different result for each row +# in a window frame. +norm expect-not=FoldGroupByAndWindow +SELECT sum(v), foo +FROM ( + SELECT *, row_number() OVER w AS foo + FROM uvwz + WINDOW w AS ( + PARTITION BY u ORDER BY z + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +GROUP BY u, foo; +---- +project + ├── columns: sum:9!null foo:8 + └── group-by (hash) + ├── columns: u:1!null row_number:8 sum:9!null + ├── grouping columns: u:1!null row_number:8 + ├── key: (1,8) + ├── fd: (1,8)-->(9) + ├── window partition=(1) ordering=+4 opt(1) + │ ├── columns: u:1!null v:2!null z:4!null row_number:8 + │ ├── key: (1,2) + │ ├── fd: (1,2)-->(4,8) + │ ├── scan uvwz + │ │ ├── columns: u:1!null v:2!null z:4!null + │ │ ├── key: (1,2) + │ │ └── fd: (1,2)-->(4) + │ └── windows + │ └── row-number [as=row_number:8, frame="rows from unbounded to unbounded"] + └── aggregations + └── sum [as=sum:9, outer=(2)] + └── v:2 diff --git a/pkg/sql/opt/norm/testdata/rules/prune_cols b/pkg/sql/opt/norm/testdata/rules/prune_cols index 48a8bdddd47b..d0fdeab60191 100644 --- a/pkg/sql/opt/norm/testdata/rules/prune_cols +++ b/pkg/sql/opt/norm/testdata/rules/prune_cols @@ -5382,47 +5382,39 @@ SELECT concat_agg(tab_1739.col3_7::STRING ORDER BY tab_1739.col3_7 DESC)::STRING FROM table86308@[0] AS tab_1739 GROUP BY tab_1739.col3_7 HAVING bool_and(false::BOOL)::BOOL ---- project - └── group-by (hash) - ├── select - │ ├── window partition=(8) - │ │ ├── window partition=(8) - │ │ │ ├── project - │ │ │ │ ├── scan table86308 - │ │ │ │ │ ├── computed column expressions - │ │ │ │ │ │ ├── col3_9 - │ │ │ │ │ │ │ └── col3_3 + 0.7530620098114014 - │ │ │ │ │ │ ├── col3_10 - │ │ │ │ │ │ │ └── lower(col3_0::STRING) - │ │ │ │ │ │ ├── col3_11 - │ │ │ │ │ │ │ └── col3_3 + 0.8790965676307678 - │ │ │ │ │ │ ├── col3_12 - │ │ │ │ │ │ │ └── lower(col3_0::STRING) - │ │ │ │ │ │ ├── col3_13 - │ │ │ │ │ │ │ └── lower(col3_6::STRING) - │ │ │ │ │ │ ├── col3_14 - │ │ │ │ │ │ │ └── col3_3 + -0.27059364318847656 - │ │ │ │ │ │ └── col3_15 - │ │ │ │ │ │ └── lower(col3_5::STRING) - │ │ │ │ │ └── partial index predicates - │ │ │ │ │ └── table3_col3_4_col3_11_col3_1_col3_0_col3_3_key: filters - │ │ │ │ │ ├── ((col3_9 = 5e-324) OR (col3_1 = -32768)) OR (col3_15 = '') - │ │ │ │ │ ├── col3_3 > 3.4028234663852886e+38 - │ │ │ │ │ └── lower(col3_0::STRING) != '""' - │ │ │ │ └── projections - │ │ │ │ ├── false - │ │ │ │ ├── col3_3 + 0.8790965676307678 - │ │ │ │ └── lower(col3_0::STRING) - │ │ │ └── windows - │ │ │ └── concat-agg [frame="range from unbounded to unbounded"] - │ │ │ └── col3_7 - │ │ └── windows - │ │ └── bool-and [frame="range from unbounded to unbounded"] - │ │ └── column21 - │ └── filters - │ └── bool_and - └── aggregations - └── const-agg - └── concat_agg + └── select + ├── group-by (hash) + │ ├── project + │ │ ├── scan table86308 + │ │ │ ├── computed column expressions + │ │ │ │ ├── col3_9 + │ │ │ │ │ └── col3_3 + 0.7530620098114014 + │ │ │ │ ├── col3_10 + │ │ │ │ │ └── lower(col3_0::STRING) + │ │ │ │ ├── col3_11 + │ │ │ │ │ └── col3_3 + 0.8790965676307678 + │ │ │ │ ├── col3_12 + │ │ │ │ │ └── lower(col3_0::STRING) + │ │ │ │ ├── col3_13 + │ │ │ │ │ └── lower(col3_6::STRING) + │ │ │ │ ├── col3_14 + │ │ │ │ │ └── col3_3 + -0.27059364318847656 + │ │ │ │ └── col3_15 + │ │ │ │ └── lower(col3_5::STRING) + │ │ │ └── partial index predicates + │ │ │ └── table3_col3_4_col3_11_col3_1_col3_0_col3_3_key: filters + │ │ │ ├── ((col3_9 = 5e-324) OR (col3_1 = -32768)) OR (col3_15 = '') + │ │ │ ├── col3_3 > 3.4028234663852886e+38 + │ │ │ └── lower(col3_0::STRING) != '""' + │ │ └── projections + │ │ └── false + │ └── aggregations + │ ├── concat-agg + │ │ └── col3_7 + │ └── bool-and + │ └── column21 + └── filters + └── bool_and exec-ddl CREATE TABLE p100478 ( diff --git a/pkg/sql/opt/norm/window_funcs.go b/pkg/sql/opt/norm/window_funcs.go index 783a7e316e6c..61a2050c10db 100644 --- a/pkg/sql/opt/norm/window_funcs.go +++ b/pkg/sql/opt/norm/window_funcs.go @@ -197,3 +197,24 @@ func (c *CustomFuncs) LimitToRowNumberFilter( ), } } + +// WindowsAreAggregations returns true if all the window functions are aggregate +// functions. +func (c *CustomFuncs) WindowsAreAggregations(windows memo.WindowsExpr) bool { + for i := range windows { + if !opt.IsAggregateOp(windows[i].Function) { + return false + } + } + return true +} + +// WindowFuncOutputCols collects all columns projected by the given set of +// window functions. +func (c *CustomFuncs) WindowFuncOutputCols(windows memo.WindowsExpr) opt.ColSet { + var cols opt.ColSet + for i := range windows { + cols.Add(windows[i].Col) + } + return cols +}