Skip to content

Commit

Permalink
opt,sql: support hint to avoid full scan
Browse files Browse the repository at this point in the history
Informs #79683

Release note (sql change): Added support for a new index hint,
AVOID_FULL_SCAN, which will prevent the optimizer from planning a
full scan for the specified table if any other plan is possible. The
hint can be used in the same way as other existing index hints. For
example, SELECT * FROM table_name@{AVOID_FULL_SCAN};. Note that a
full scan of a partial index may still be preferred by the optimizer,
unless AVOID_FULL_SCAN is used in combination with a specific partial
index via FORCE_INDEX=index_name. This hint is similar to NO_FULL_SCAN,
but will not error if a full scan cannot be avoided.
  • Loading branch information
rytaft committed Dec 19, 2024
1 parent 991262e commit 48274d6
Show file tree
Hide file tree
Showing 19 changed files with 313 additions and 24 deletions.
19 changes: 19 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/select
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,25 @@ SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,NO_FULL_SCAN} WHERE b > 0
statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,NO_FULL_SCAN} WHERE a > 0 AND b = 1

# Now avoid full scans with a hint. A full scan should not cause an error.
statement ok
SELECT * FROM t_disallow_scans@{AVOID_FULL_SCAN}

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,AVOID_FULL_SCAN}

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,AVOID_FULL_SCAN} WHERE a > 0

statement ok
SELECT * FROM t_disallow_scans@{AVOID_FULL_SCAN} WHERE a > 0

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,AVOID_FULL_SCAN} WHERE b > 0

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,AVOID_FULL_SCAN} WHERE a > 0 AND b = 1

# Now disable full scans with the session variable.
statement ok
SET disallow_full_table_scans = true;
Expand Down
30 changes: 30 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/delete
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,36 @@ vectorized: true
spans: /1-/1000 /2001-/3000
locking strength: for update

# AVOID_FULL_SCAN also works to ensure a constrained scan.
query T
EXPLAIN (VERBOSE) DELETE FROM xyz@{AVOID_FULL_SCAN} WHERE (y > 0 AND y < 1000) OR (y > 2000 AND y < 3000) RETURNING z
----
distribution: local
vectorized: true
·
• project
│ columns: (z)
└── • delete
│ columns: (x, z)
│ estimated row count: 990 (missing stats)
│ from: xyz
│ auto commit
└── • index join
│ columns: (x, y, z)
│ estimated row count: 990 (missing stats)
│ table: xyz@xyz_pkey
│ key columns: x
│ locking strength: for update
└── • scan
columns: (x, y)
estimated row count: 990 (missing stats)
table: xyz@xyz_y_idx
spans: /1-/1000 /2001-/3000
locking strength: for update

# Testcase for issue 105803.

statement ok
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/explain_redact
Original file line number Diff line number Diff line change
Expand Up @@ -1156,7 +1156,7 @@ update ab
query T
EXPLAIN (OPT, MEMO, REDACT) UPDATE ab SET a = a || 'ab' WHERE a > 'a'
----
memo (optimized, ~13KB, required=[presentation: info:15] [distribution: test])
memo (optimized, ~14KB, required=[presentation: info:15] [distribution: test])
├── G1: (explain G2 [distribution: test])
│ └── [presentation: info:15] [distribution: test]
│ ├── best: (explain G2="[distribution: test]" [distribution: test])
Expand Down Expand Up @@ -3825,7 +3825,7 @@ project
query T
EXPLAIN (OPT, MEMO, REDACT) SELECT min(c || 'cccc') OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING EXCLUDE CURRENT ROW) FROM cd
----
memo (optimized, ~11KB, required=[presentation: info:10] [distribution: test])
memo (optimized, ~12KB, required=[presentation: info:10] [distribution: test])
├── G1: (explain G2 [presentation: min:8] [distribution: test])
│ └── [presentation: info:10] [distribution: test]
│ ├── best: (explain G2="[presentation: min:8] [distribution: test]" [presentation: min:8] [distribution: test])
Expand Down
35 changes: 35 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/update
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,41 @@ vectorized: true
spans: /1-/1000 /2001-/3000
locking strength: for update

# AVOID_FULL_SCAN also works to ensure a constrained scan.
query T
EXPLAIN (VERBOSE) UPDATE xyz@{AVOID_FULL_SCAN} SET x = 5 WHERE (y > 0 AND y < 1000) OR (y > 2000 AND y < 3000)
----
distribution: local
vectorized: true
·
• update
│ columns: ()
│ estimated row count: 0 (missing stats)
│ table: xyz
│ set: x
│ auto commit
└── • render
│ columns: (x, y, z, x_new)
│ render x_new: 5
│ render x: x
│ render y: y
│ render z: z
└── • index join
│ columns: (x, y, z)
│ estimated row count: 990 (missing stats)
│ table: xyz@xyz_pkey
│ key columns: x
│ locking strength: for update
└── • scan
columns: (x, y)
estimated row count: 990 (missing stats)
table: xyz@y_idx
spans: /1-/1000 /2001-/3000
locking strength: for update

statement ok
CREATE TABLE pks (
k1 INT,
Expand Down
106 changes: 106 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/upsert
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,112 @@ vectorized: true
query error could not produce a query plan conforming to the NO_FULL_SCAN hint
EXPLAIN (VERBOSE) UPSERT INTO indexed@{FORCE_INDEX=secondary,NO_FULL_SCAN} VALUES (1)

# AVOID_FULL_SCAN also works to ensure a constrained scan, and does not error if
# one is not possible.
query T
EXPLAIN (VERBOSE) UPSERT INTO indexed@{AVOID_FULL_SCAN} VALUES (1)
----
distribution: local
vectorized: true
·
• upsert
│ columns: ()
│ estimated row count: 0 (missing stats)
│ into: indexed(a, b, c, d)
│ auto commit
│ arbiter indexes: indexed_pkey
└── • project
│ columns: (column1, b_default, c_default, d_comp, a, b, c, d, b_default, c_default, d_comp, a, check1)
└── • render
│ columns: (check1, column1, b_default, c_default, d_comp, a, b, c, d)
│ render check1: c_default > 0
│ render column1: column1
│ render b_default: b_default
│ render c_default: c_default
│ render d_comp: d_comp
│ render a: a
│ render b: b
│ render c: c
│ render d: d
└── • cross join (left outer)
│ columns: (column1, b_default, c_default, d_comp, a, b, c, d)
│ estimated row count: 1 (missing stats)
├── • values
│ columns: (column1, b_default, c_default, d_comp)
│ size: 4 columns, 1 row
│ row 0, expr 0: 1
│ row 0, expr 1: CAST(NULL AS INT8)
│ row 0, expr 2: 10
│ row 0, expr 3: 11
└── • scan
columns: (a, b, c, d)
estimated row count: 1 (missing stats)
table: indexed@indexed_pkey
spans: /1/0
locking strength: for update

query T
EXPLAIN (VERBOSE) UPSERT INTO indexed@{FORCE_INDEX=secondary,AVOID_FULL_SCAN} VALUES (1)
----
distribution: local
vectorized: true
·
• upsert
│ columns: ()
│ estimated row count: 0 (missing stats)
│ into: indexed(a, b, c, d)
│ auto commit
│ arbiter indexes: indexed_pkey
└── • project
│ columns: (column1, b_default, c_default, d_comp, a, b, c, d, b_default, c_default, d_comp, a, check1)
└── • render
│ columns: (check1, column1, b_default, c_default, d_comp, a, b, c, d)
│ render check1: c_default > 0
│ render column1: column1
│ render b_default: b_default
│ render c_default: c_default
│ render d_comp: d_comp
│ render a: a
│ render b: b
│ render c: c
│ render d: d
└── • cross join (left outer)
│ columns: (column1, b_default, c_default, d_comp, a, b, c, d)
│ estimated row count: 1 (missing stats)
├── • values
│ columns: (column1, b_default, c_default, d_comp)
│ size: 4 columns, 1 row
│ row 0, expr 0: 1
│ row 0, expr 1: CAST(NULL AS INT8)
│ row 0, expr 2: 10
│ row 0, expr 3: 11
└── • filter
│ columns: (a, b, c, d)
│ estimated row count: 1 (missing stats)
│ filter: a = 1
└── • index join
│ columns: (a, b, c, d)
│ estimated row count: 1,000 (missing stats)
│ table: indexed@indexed_pkey
│ key columns: a
└── • scan
columns: (a, b, d)
estimated row count: 1,000 (missing stats)
table: indexed@secondary
spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
INSERT INTO indexed@indexed_pkey AS indexed_pk
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,9 @@ type ScanFlags struct {
// NoFullScan disallows use of a full scan for scanning this table.
NoFullScan bool

// AvoidFullScan avoids use of a full scan for scanning this table.
AvoidFullScan bool

// ForceIndex forces the use of a specific index (specified in Index).
// ForceIndex and NoIndexJoin cannot both be set at the same time.
ForceIndex bool
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/expr_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@ func (f *ExprFmtCtx) formatRelational(e RelExpr, tp treeprinter.Node) {
if private.Flags.NoFullScan {
b.WriteString(" no-full-scan")
}
if private.Flags.AvoidFullScan {
b.WriteString(" avoid-full-scan")
}
if private.Flags.ForceZigzag {
if private.Flags.ZigzagIndexes.Empty() {
b.WriteString(" force-zigzag")
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/memo/interner.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ func (h *hasher) HashScanFlags(val ScanFlags) {
h.HashBool(val.NoIndexJoin)
h.HashBool(val.NoZigzagJoin)
h.HashBool(val.NoFullScan)
h.HashBool(val.AvoidFullScan)
h.HashBool(val.ForceIndex)
h.HashBool(val.ForceInvertedIndex)
h.HashBool(val.ForceZigzag)
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/opt/memo/interner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ func TestInterner(t *testing.T) {
{hashFn: in.hasher.HashScanFlags, eqFn: in.hasher.IsScanFlagsEqual, variations: []testVariation{
// Use unnamed fields so that compilation fails if a new field is
// added to ScanFlags.
{val1: ScanFlags{false, false, false, false, false, false, false, 0, 0, intsets.Fast{}}, val2: ScanFlags{}, equal: true},
{val1: ScanFlags{false, false, false, false, false, false, false, false, 0, 0, intsets.Fast{}}, val2: ScanFlags{}, equal: true},
{val1: ScanFlags{}, val2: ScanFlags{}, equal: true},
{val1: ScanFlags{NoIndexJoin: false}, val2: ScanFlags{NoIndexJoin: true}, equal: false},
{val1: ScanFlags{NoIndexJoin: true}, val2: ScanFlags{NoIndexJoin: true}, equal: true},
Expand All @@ -417,6 +417,8 @@ func TestInterner(t *testing.T) {
{val1: ScanFlags{NoIndexJoin: true, Index: 1}, val2: ScanFlags{NoIndexJoin: false, Index: 1}, equal: false},
{val1: ScanFlags{NoFullScan: true}, val2: ScanFlags{NoFullScan: false}, equal: false},
{val1: ScanFlags{NoFullScan: true}, val2: ScanFlags{NoFullScan: true}, equal: true},
{val1: ScanFlags{AvoidFullScan: true}, val2: ScanFlags{AvoidFullScan: false}, equal: false},
{val1: ScanFlags{AvoidFullScan: true}, val2: ScanFlags{AvoidFullScan: true}, equal: true},
{val1: ScanFlags{ForceInvertedIndex: true}, val2: ScanFlags{ForceInvertedIndex: false}, equal: false},
{val1: ScanFlags{ForceInvertedIndex: true}, val2: ScanFlags{ForceInvertedIndex: true}, equal: true},
{
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/optbuilder/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,7 @@ func (b *Builder) buildScan(
private.Flags.NoIndexJoin = indexFlags.NoIndexJoin
private.Flags.NoZigzagJoin = indexFlags.NoZigzagJoin
private.Flags.NoFullScan = indexFlags.NoFullScan
private.Flags.AvoidFullScan = indexFlags.AvoidFullScan
private.Flags.ForceInvertedIndex = indexFlags.ForceInvertedIndex
if indexFlags.Index != "" || indexFlags.IndexID != 0 {
idx := -1
Expand Down
27 changes: 19 additions & 8 deletions pkg/sql/opt/xform/coster.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ const (
// plan that could satisfy the hints.
hugeCost memo.Cost = 1e100

// avoidFullScanCostPenalty is the cost to use for full scans when we want to
// avoid them at all costs, but not error in case no other plan is possible.
// We can't use hugeCost directly since it might interfere with other hints
// like forcing a particular index.
avoidFullScanCostPenalty = hugeCost / 10

// fullScanRowCountPenalty adds a penalty to full table scans. This is especially
// useful for empty or very small tables, where we would get plans that are
// surprising to users (like full scans instead of point lookups).
Expand Down Expand Up @@ -758,14 +764,14 @@ func (c *coster) computeScanCost(scan *memo.ScanExpr, required *physical.Require
}

isUnfiltered := scan.IsUnfiltered(c.mem.Metadata())
if scan.Flags.NoFullScan {
// Normally a full scan of a partial index would be allowed with the
// NO_FULL_SCAN hint (isUnfiltered is false for partial indexes), but if the
// user has explicitly forced the partial index *and* used NO_FULL_SCAN, we
// disallow the full index scan.
if isUnfiltered || (scan.Flags.ForceIndex && scan.IsFullIndexScan()) {
return hugeCost
}

// Normally a full scan of a partial index would not be considered a "full
// scan" for the purposes of the NO_FULL_SCAN and AVOID_FULL_SCAN hints
// (isUnfiltered is false for partial indexes), but if the user has explicitly
// forced the partial index, we do consider it a full scan.
isFullScan := isUnfiltered || (scan.Flags.ForceIndex && scan.IsFullIndexScan())
if scan.Flags.NoFullScan && isFullScan {
return hugeCost
}

if scan.Flags.ForceInvertedIndex && !scan.IsInvertedScan(c.mem.Metadata()) {
Expand Down Expand Up @@ -803,6 +809,11 @@ func (c *coster) computeScanCost(scan *memo.ScanExpr, required *physical.Require
}
baseCost := memo.Cost(numSpans * randIOCostFactor)

// Add a penalty for a full scan if needed.
if scan.Flags.AvoidFullScan && isFullScan {
baseCost += avoidFullScanCostPenalty
}

// If this is a virtual scan, add the cost of fetching table descriptors.
if c.mem.Metadata().Table(scan.Table).IsVirtualTable() {
baseCost += virtualScanTableDescriptorFetchCost
Expand Down
Loading

0 comments on commit 48274d6

Please sign in to comment.