Skip to content

Commit

Permalink
opt: infer functional dependencies for window functions
Browse files Browse the repository at this point in the history
This commit adds logic to infer strict functional dependencies from
a Window operator's partition column(s) to some or all of its window
functions when the following conditions are satisfied:
1. The window function must be an aggregate, or first_value or last_value.
2. The window frame must be unbounded.

The above conditions ensure that the window function always produces the
same result given the same window frame, as well as that every row in a
partition has the same window frame. This means that the window function
produces the same output for every row in the partition, and therefore,
the partition columns functionally determine the output of the window
function.

This patch also fixes a small omission made in the window function FD
calculation, which caused the window's FDs to preserve an input key
without extending the key to apply to all the window's output cols.

Epic: None

Release note: None
  • Loading branch information
DrewKimball committed Dec 12, 2024
1 parent 60c5e14 commit 6a3221f
Show file tree
Hide file tree
Showing 13 changed files with 465 additions and 79 deletions.
15 changes: 15 additions & 0 deletions pkg/sql/colexec/colexecwindow/window_aggregator.eg.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions pkg/sql/colexec/colexecwindow/window_aggregator_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,11 @@ func (a *slidingWindowAggregator) processBatch(batch coldata.Batch, startIdx, en
})
}

// INVARIANT: the rows within a window frame are always processed in the same
// order, regardless of whether the user specified an ordering. This means that
// two rows with the exact same frame will produce the same result for a given
// aggregation.
//
// execgen:inline
// execgen:template<removeRows>
func aggregateOverIntervals(intervals []windowInterval, removeRows bool) {
Expand Down
53 changes: 52 additions & 1 deletion pkg/sql/opt/memo/logical_props_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/cast"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree/treewindow"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
"github.com/cockroachdb/cockroach/pkg/util/intsets"
Expand Down Expand Up @@ -1395,8 +1396,22 @@ func (b *logicalPropsBuilder) buildWindowProps(window *WindowExpr, rel *props.Re
// examples include:
// * row_number+the partition is a key.
// * rank is determined by the partition and the value being ordered by.
// * aggregations/first_value/last_value are determined by the partition.
rel.FuncDeps.CopyFrom(&inputProps.FuncDeps)
if inputProps.FuncDeps.ColsAreStrictKey(window.Partition) {
// Special case: when the partition columns form a strict key over the
// input, each partition will only have a single row. Therefore, the window
// function output columns are trivially determined by the partition cols.
rel.FuncDeps.AddStrictKey(window.Partition, rel.OutputCols)
} else {
// It may still be possible to infer functional dependencies based on the
// window frames and window function types.
determinedCols := getWindowPartitionDeps(window, &inputProps.FuncDeps)
if !determinedCols.Empty() {
// The partition columns determine some of the window function outputs.
rel.FuncDeps.AddStrictDependency(window.Partition, determinedCols)
}
}
rel.FuncDeps.ProjectCols(rel.OutputCols)

// Cardinality
// -----------
Expand Down Expand Up @@ -2971,3 +2986,39 @@ func CanBeCompositeSensitive(e opt.Expr) bool {
isCompositeInsensitive, _ := check(e)
return !isCompositeInsensitive
}

// getWindowPartitionDeps returns the set of window function output columns that
// are functionally determined by the Window operator's partition columns
// (which may be empty) based on the window frame and function type.
//
// NOTE: getWindowPartitionDeps assumes that execution performs aggregation in
// the same order for every row in the window, even when there is no explicit
// ORDER BY.
func getWindowPartitionDeps(window *WindowExpr, inputFDs *props.FuncDepSet) opt.ColSet {
var determinedCols opt.ColSet
for i := range window.Windows {
// Ensure that the window frame extends to the entire partition. This
// ensures that every row in the partition has the exact same frame.
item := &window.Windows[i]
if item.Frame.FrameExclusion != treewindow.NoExclusion ||
item.Frame.StartBoundType != treewindow.UnboundedPreceding ||
item.Frame.EndBoundType != treewindow.UnboundedFollowing {
continue
}
// Aggregations, first_value, and last_value functions always produce the
// same result for any row given the same frame.
if !opt.IsAggregateOp(item.Function) {
switch item.Function.Op() {
case opt.FirstValueOp, opt.LastValueOp:
default:
continue
}
}
// Since we determined that this function always produces the same result
// for a given window frame, as well as that the frame is the same for all
// rows in a given partition, there is a dependency from the partition
// columns to the output of this window function.
determinedCols.Add(item.Col)
}
return determinedCols
}
Loading

0 comments on commit 6a3221f

Please sign in to comment.