From d9eebcfc0a9f6b853294a0d35235bb98af9c11f4 Mon Sep 17 00:00:00 2001 From: Michael Erickson Date: Fri, 17 Jan 2025 16:48:36 -0800 Subject: [PATCH] opt/memo: silence error in factorOutVirtualCols for mutation columns Mutation columns (such as those being dropped by an ALTER TABLE DROP COLUMN statement) exist in table metadata, but might not necessarily have an associated computed column expression. This is because `optbuilder.(*Builder).addComputedColsForTable` usually skips over mutation columns. In `memo.(*statisticsBuilder).factorOutVirtualCols` we gather all the expressions for virtual computed columns of the table. We were failing an assertion if we couldn't find the expression for a column. This is fine, however, because `factorOutVirtualCols` only powers an optimization in statistics builder and is not necessary for correctness. This commit silences the assertion if the column is a mutation column, and futhermore, only checks it in test builds otherwise, because ideally we wouldn't want to miss the optimization for non-mutation columns. Fixes: #129405 Fixes: #138147 Fixes: #138809 Release note (bug fix): Fix a rare bug in which a query might fail with error "could not find computed column expression for column in table" while dropping a virtual computed column from the table. This bug was introduced in v23.2.4. --- .../testdata/logic_test/distsql_stats | 80 ++++++++++++++++++- pkg/sql/opt/memo/statistics_builder.go | 13 ++- 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index 6fff2c28062d..7e6060007320 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -3318,8 +3318,14 @@ mutation {crdb_internal_a_shard_3} 10 true statement ok SET CLUSTER SETTING jobs.debug.pausepoints = '' +let $job +SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status LIKE 'pause%' FETCH FIRST 1 ROWS ONLY + +statement ok +RESUME JOB $job + statement ok -RESUME JOB (SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status = 'paused' FETCH FIRST 1 ROWS ONLY) +SHOW JOB WHEN COMPLETE $job # Test optimizer_use_virtual_computed_column_stats. @@ -3488,6 +3494,78 @@ upper_bound range_rows distinct_range_rows equal_rows 'dispatched' 0 0 1 'delivered' 0 0 1 +# Regression test for #138809: check that we can still use stats on virtual +# computed columns after dropping the column. + +statement ok +CREATE TABLE t138809 ( + a INT PRIMARY KEY, + b INT AS (a + 1) VIRTUAL, + c INT AS (a + 2) VIRTUAL +) + +statement ok +INSERT INTO t138809 (a) VALUES (1), (2) + +statement ok +ANALYZE t138809 + +query TIIIIB colnames +SELECT column_names, row_count, null_count, distinct_count, avg_size, histogram_id IS NOT NULL AS has_histogram +FROM [SHOW STATISTICS FOR TABLE t138809] +ORDER BY column_names::STRING +---- +column_names row_count null_count distinct_count avg_size has_histogram +{a} 2 0 2 8 true +{b} 2 0 2 8 true +{c} 2 0 2 8 true + +query T +EXPLAIN SELECT count(*) FROM t138809 WHERE b > 1 +---- +distribution: full +vectorized: true +· +• group (scalar) +│ estimated row count: 1 +│ +└── • scan + estimated row count: 2 (100% of the table; stats collected ago) + table: t138809@t138809_pkey + spans: [/1 - ] + +statement ok +SET CLUSTER SETTING jobs.debug.pausepoints = 'schemachanger.root..1' + +statement error job \d+ was paused before it completed with reason: pause point "schemachanger\.root\.\.1" hit +ALTER TABLE t138809 DROP COLUMN c + +query T +EXPLAIN SELECT count(*) FROM t138809 WHERE b > 1 +---- +distribution: full +vectorized: true +· +• group (scalar) +│ estimated row count: 1 +│ +└── • scan + estimated row count: 2 (100% of the table; stats collected ago) + table: t138809@t138809_pkey + spans: [/1 - ] + +statement ok +SET CLUSTER SETTING jobs.debug.pausepoints = DEFAULT + +let $job +SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t138809 DROP COLUMN c' AND status LIKE 'pause%' FETCH FIRST 1 ROWS ONLY + +statement ok +RESUME JOB $job + +statement ok +SHOW JOB WHEN COMPLETE $job + # Verify that partial stats are collected on single column prefixes of forward # indexes and skips over partial, sharded, and implicitly partitioned indexes # when columns are unspecified. diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index 703f1d840ca5..d050cd579bf1 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -5254,9 +5254,16 @@ func (sb *statisticsBuilder) factorOutVirtualCols( } expr, ok := tab.ComputedCols[colID] if !ok { - panic(errors.AssertionFailedf( - "could not find computed column expression for column %v in table %v", colID, tab.Alias, - )) + // If we can't find the computed column expression, this is probably a + // mutation column. Whatever the reason, it's safe to skip: we simply + // won't factor out matching expressions. + if buildutil.CrdbTestBuild && + !tab.Table.Column(tab.MetaID.ColumnOrdinal(colID)).IsMutation() { + panic(errors.AssertionFailedf( + "could not find computed column expression for column %v in table %v", colID, tab.Alias, + )) + } + return } virtExprs = append(virtExprs, virtExpr{colID: colID, expr: expr}) })