Skip to content

Commit

Permalink
Merge #121171
Browse files Browse the repository at this point in the history
121171: opt/memo: fix optimizer_use_virtual_computed_column_stats r=DrewKimball a=michae2

With optimizer_use_virtual_computed_column_stats set to false, constrained scans were still sometimes using stats on virtual computed columns. This commit adds a check to makeTableStatistics which prevents creation of any statistics referencing a virtual computed column, which is a stronger check than existed before.

With this check, the VirtualCols sets will always be empty when optimizer_use_virtual_computed_column_stats is false.

Informs: #68254

Epic: CRDB-8949

Release note: None

Co-authored-by: Michael Erickson <[email protected]>
  • Loading branch information
craig[bot] and michae2 committed Mar 27, 2024
2 parents 87ed0b2 + 579d42e commit 6b662e3
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 0 deletions.
98 changes: 98 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -2777,3 +2777,101 @@ SET CLUSTER SETTING jobs.debug.pausepoints = ''

statement ok
RESUME JOB (SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status = 'paused' FETCH FIRST 1 ROWS ONLY)

# Test optimizer_use_virtual_computed_column_stats.

statement ok
CREATE TABLE mno (
m int NOT NULL,
n int,
o int AS (sqrt(m::float)::int) VIRTUAL,
PRIMARY KEY (m),
INDEX (n),
INDEX (o) STORING (n)
)

statement ok
INSERT INTO mno (m, n) SELECT i, i % 50 FROM generate_series(0, 999) s(i)

statement ok
ANALYZE mno

query TTIIB
SELECT statistics_name, column_names, row_count, distinct_count, histogram_id IS NOT NULL AS has_histogram
FROM [SHOW STATISTICS FOR TABLE mno]
ORDER BY statistics_name, column_names::STRING
----
NULL {m} 1000 1000 true
NULL {n} 1000 50 true
NULL {o} 1000 33 true

query T
EXPLAIN SELECT * FROM mno WHERE n = 1 AND o = 9
----
distribution: full
vectorized: true
·
• render
└── • filter
│ estimated row count: 1
│ filter: n = 1
└── • scan
estimated row count: 18 (1.8% of the table; stats collected <hidden> ago)
table: mno@mno_o_idx
spans: [/9 - /9]

query T
EXPLAIN SELECT * FROM mno WHERE n = 1 AND o = 11
----
distribution: full
vectorized: true
·
• render
└── • filter
│ estimated row count: 1
│ filter: sqrt(m::FLOAT8)::INT8 = 11
└── • scan
estimated row count: 20 (2.0% of the table; stats collected <hidden> ago)
table: mno@mno_n_idx
spans: [/1 - /1]

statement ok
SET optimizer_use_virtual_computed_column_stats = false

query T
EXPLAIN SELECT * FROM mno WHERE n = 1 AND o = 9
----
distribution: full
vectorized: true
·
• render
└── • filter
│ estimated row count: 7
│ filter: n = 1
└── • scan
estimated row count: 10 (1.0% of the table; stats collected <hidden> ago)
table: mno@mno_o_idx
spans: [/9 - /9]

query T
EXPLAIN SELECT * FROM mno WHERE n = 1 AND o = 11
----
distribution: full
vectorized: true
·
• render
└── • filter
│ estimated row count: 7
│ filter: n = 1
└── • scan
estimated row count: 10 (1.0% of the table; stats collected <hidden> ago)
table: mno@mno_o_idx
spans: [/11 - /11]
4 changes: 4 additions & 0 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati

// Add all the column statistics, using the most recent statistic for each
// column set. Stats are ordered with most recent first.
EachStat:
for i := first; i < tab.StatisticCount(); i++ {
stat := tab.Statistic(i)
if stat.IsPartial() {
Expand All @@ -678,6 +679,9 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati
col := tabID.ColumnID(colOrd)
cols.Add(col)
if tab.Column(colOrd).IsVirtualComputed() {
if !sb.evalCtx.SessionData().OptimizerUseVirtualComputedColumnStats {
continue EachStat
}
// We only add virtual columns if we have statistics on them, so that
// in higher groups we can decide whether to look up statistics on
// virtual columns or on the columns used in their defining
Expand Down

0 comments on commit 6b662e3

Please sign in to comment.