Skip to content

Commit

Permalink
Merge #139388
Browse files Browse the repository at this point in the history
139388: opt/memo: silence error in factorOutVirtualCols for mutation columns r=yuzefovich,fqazi a=michae2

Mutation columns (such as those being dropped by an ALTER TABLE DROP COLUMN statement) exist in table metadata, but might not necessarily have an associated computed column expression. This is because `optbuilder.(*Builder).addComputedColsForTable` usually skips over mutation columns.

In `memo.(*statisticsBuilder).factorOutVirtualCols` we gather all the expressions for virtual computed columns of the table. We were failing an assertion if we couldn't find the expression for a column. This is fine, however, because `factorOutVirtualCols` only powers an optimization in statistics builder and is not necessary for correctness.

This commit silences the assertion if the column is a mutation column, and futhermore, only checks it in test builds otherwise, because ideally we wouldn't want to miss the optimization for non-mutation columns.

Fixes: #129405
Fixes: #138147
Fixes: #138809

Release note (bug fix): Fix a rare bug in which a query might fail with error "could not find computed column expression for column in table" while dropping a virtual computed column from the table. This bug was introduced in v23.2.4.

Co-authored-by: Michael Erickson <[email protected]>
  • Loading branch information
craig[bot] and michae2 committed Jan 25, 2025
2 parents 49e62ff + d9eebcf commit 7d18e50
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 4 deletions.
80 changes: 79 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -3318,8 +3318,14 @@ mutation {crdb_internal_a_shard_3} 10 true
statement ok
SET CLUSTER SETTING jobs.debug.pausepoints = ''

let $job
SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status LIKE 'pause%' FETCH FIRST 1 ROWS ONLY

statement ok
RESUME JOB $job

statement ok
RESUME JOB (SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t118537 ALTER PRIMARY KEY USING COLUMNS (a) USING HASH' AND status = 'paused' FETCH FIRST 1 ROWS ONLY)
SHOW JOB WHEN COMPLETE $job

# Test optimizer_use_virtual_computed_column_stats.

Expand Down Expand Up @@ -3488,6 +3494,78 @@ upper_bound range_rows distinct_range_rows equal_rows
'dispatched' 0 0 1
'delivered' 0 0 1

# Regression test for #138809: check that we can still use stats on virtual
# computed columns after dropping the column.

statement ok
CREATE TABLE t138809 (
a INT PRIMARY KEY,
b INT AS (a + 1) VIRTUAL,
c INT AS (a + 2) VIRTUAL
)

statement ok
INSERT INTO t138809 (a) VALUES (1), (2)

statement ok
ANALYZE t138809

query TIIIIB colnames
SELECT column_names, row_count, null_count, distinct_count, avg_size, histogram_id IS NOT NULL AS has_histogram
FROM [SHOW STATISTICS FOR TABLE t138809]
ORDER BY column_names::STRING
----
column_names row_count null_count distinct_count avg_size has_histogram
{a} 2 0 2 8 true
{b} 2 0 2 8 true
{c} 2 0 2 8 true

query T
EXPLAIN SELECT count(*) FROM t138809 WHERE b > 1
----
distribution: full
vectorized: true
·
• group (scalar)
│ estimated row count: 1
└── • scan
estimated row count: 2 (100% of the table; stats collected <hidden> ago)
table: t138809@t138809_pkey
spans: [/1 - ]

statement ok
SET CLUSTER SETTING jobs.debug.pausepoints = 'schemachanger.root..1'

statement error job \d+ was paused before it completed with reason: pause point "schemachanger\.root\.\.1" hit
ALTER TABLE t138809 DROP COLUMN c

query T
EXPLAIN SELECT count(*) FROM t138809 WHERE b > 1
----
distribution: full
vectorized: true
·
• group (scalar)
│ estimated row count: 1
└── • scan
estimated row count: 2 (100% of the table; stats collected <hidden> ago)
table: t138809@t138809_pkey
spans: [/1 - ]

statement ok
SET CLUSTER SETTING jobs.debug.pausepoints = DEFAULT

let $job
SELECT job_id FROM crdb_internal.jobs WHERE description LIKE 'ALTER TABLE %t138809 DROP COLUMN c' AND status LIKE 'pause%' FETCH FIRST 1 ROWS ONLY

statement ok
RESUME JOB $job

statement ok
SHOW JOB WHEN COMPLETE $job

# Verify that partial stats are collected on single column prefixes of forward
# indexes and skips over partial, sharded, and implicitly partitioned indexes
# when columns are unspecified.
Expand Down
13 changes: 10 additions & 3 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -5254,9 +5254,16 @@ func (sb *statisticsBuilder) factorOutVirtualCols(
}
expr, ok := tab.ComputedCols[colID]
if !ok {
panic(errors.AssertionFailedf(
"could not find computed column expression for column %v in table %v", colID, tab.Alias,
))
// If we can't find the computed column expression, this is probably a
// mutation column. Whatever the reason, it's safe to skip: we simply
// won't factor out matching expressions.
if buildutil.CrdbTestBuild &&
!tab.Table.Column(tab.MetaID.ColumnOrdinal(colID)).IsMutation() {
panic(errors.AssertionFailedf(
"could not find computed column expression for column %v in table %v", colID, tab.Alias,
))
}
return
}
virtExprs = append(virtExprs, virtExpr{colID: colID, expr: expr})
})
Expand Down

0 comments on commit 7d18e50

Please sign in to comment.