Skip to content

Commit

Permalink
sql: rerun execbuilder for gist matching
Browse files Browse the repository at this point in the history
This commit reruns the execbuilder with the explain factory whenever
plan-gist matching happens. It now occurs right after we created the
optimizer plan, and this commit extracts a small helper to run the
execbuild using different exec factories. As a result, `plan.txt` will
be fully populated in the bundle.

Release note (bug fix): Previously, whenever CockroachDB collected
a statement bundle when plan-gist-based matching was used, `plan.txt`
file would be incomplete, and this is now fixed. The bug has been
present since introduction of plan-gist-based matching feature in 23.1
but was partially addressed in 24.2 release.
  • Loading branch information
yuzefovich committed Dec 9, 2024
1 parent a247ae5 commit 27b9046
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 114 deletions.
33 changes: 16 additions & 17 deletions pkg/sql/conn_executor_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -1830,7 +1830,7 @@ func (ex *connExecutor) dispatchToExecutionEngine(

if ppInfo := getPausablePortalInfo(); ppInfo != nil {
if !ppInfo.dispatchToExecutionEngine.cleanup.isComplete {
err = ex.makeExecPlan(ctx, planner)
ctx, err = ex.makeExecPlan(ctx, planner)
if flags := planner.curPlan.flags; err == nil && (flags.IsSet(planFlagContainsMutation) || flags.IsSet(planFlagIsDDL)) {
telemetry.Inc(sqltelemetry.NotReadOnlyStmtsTriedWithPausablePortals)
// We don't allow mutations in a pausable portal. Set it back to
Expand All @@ -1851,21 +1851,12 @@ func (ex *connExecutor) dispatchToExecutionEngine(
} else {
// Prepare the plan. Note, the error is processed below. Everything
// between here and there needs to happen even if there's an error.
err = ex.makeExecPlan(ctx, planner)
ctx, err = ex.makeExecPlan(ctx, planner)
defer planner.curPlan.close(ctx)
}

// Include gist in error reports.
planGist := planner.instrumentation.planGist.String()
ctx = withPlanGist(ctx, planGist)
if ppInfo := getPausablePortalInfo(); ppInfo == nil || !ppInfo.dispatchToExecutionEngine.cleanup.isComplete {
// If we're not using pausable portals, or it's the first execution of
// the pausable portal, and we're not collecting a bundle yet, check
// whether we should get a bundle for this particular plan gist.
if ih := &planner.instrumentation; !ih.collectBundle && ih.outputMode == unmodifiedOutput {
ctx = ih.setupWithPlanGist(ctx, ex.server.cfg, stmt.StmtNoConstants, planGist, &planner.curPlan)
}
}
ctx = withPlanGist(ctx, planner.instrumentation.planGist.String())

if planner.extendedEvalCtx.TxnImplicit {
planner.curPlan.flags.Set(planFlagImplicitTxn)
Expand Down Expand Up @@ -2297,14 +2288,16 @@ var txnSchemaChangeErr = pgerror.Newf(
// makeExecPlan creates an execution plan and populates planner.curPlan using
// the cost-based optimizer. This is used to create the plan when executing a
// query in the "simple" pgwire protocol.
func (ex *connExecutor) makeExecPlan(ctx context.Context, planner *planner) error {
func (ex *connExecutor) makeExecPlan(
ctx context.Context, planner *planner,
) (context.Context, error) {
if err := ex.maybeUpgradeToSerializable(ctx, planner.stmt); err != nil {
return err
return ctx, err
}

if err := planner.makeOptimizerPlan(ctx); err != nil {
log.VEventf(ctx, 1, "optimizer plan failed: %v", err)
return err
return ctx, err
}

flags := planner.curPlan.flags
Expand All @@ -2321,7 +2314,7 @@ func (ex *connExecutor) makeExecPlan(ctx context.Context, planner *planner) erro
// - the scan is considered large.
// - the query is not an internal query.
ex.metrics.EngineMetrics.FullTableOrIndexScanRejectedCount.Inc(1)
return errors.WithHint(
return ctx, errors.WithHint(
pgerror.Newf(pgcode.TooManyRows,
"query `%s` contains a full table/index scan which is explicitly disallowed",
planner.stmt.SQL),
Expand All @@ -2343,7 +2336,13 @@ func (ex *connExecutor) makeExecPlan(ctx context.Context, planner *planner) erro
ctx, ex.server.idxRecommendationsCache, planner, ex.executorType == executorTypeInternal,
)

return nil
// Now that we have the plan gist, check whether we should get a bundle for
// it.
if ih := &planner.instrumentation; !ih.collectBundle && ih.outputMode == unmodifiedOutput {
ctx = ih.setupWithPlanGist(ctx, planner, ex.server.cfg)
}

return ctx, nil
}

// topLevelQueryStats returns some basic statistics about the run of the query.
Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/distsql_plan_changefeed.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,10 @@ func PlanCDCExpression(
}

const allowAutoCommit = false
const disableTelemetryAndPlanGists = false
if err := opc.runExecBuilder(
ctx, &p.curPlan, &p.stmt, newExecFactory(ctx, p), memo, p.SemaCtx(), p.EvalContext(), allowAutoCommit,
ctx, &p.curPlan, &p.stmt, newExecFactory(ctx, p), memo, p.SemaCtx(),
p.EvalContext(), allowAutoCommit, disableTelemetryAndPlanGists,
); err != nil {
return cdcPlan, err
}
Expand Down
29 changes: 15 additions & 14 deletions pkg/sql/explain_bundle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ CREATE TABLE users(id UUID DEFAULT gen_random_uuid() PRIMARY KEY, promo_id INT R

t.Run("plan-gist matching", func(t *testing.T) {
r.Exec(t, "CREATE TABLE gist (k INT PRIMARY KEY);")
r.Exec(t, "INSERT INTO gist SELECT generate_series(1, 10)")
const fprint = `SELECT * FROM gist`

// Come up with a target gist.
Expand All @@ -580,20 +581,20 @@ CREATE TABLE users(id UUID DEFAULT gen_random_uuid() PRIMARY KEY, promo_id INT R
if name != "plan.txt" {
return nil
}
// Add a new line at the beginning for cleaner formatting in the
// test.
contents = "\n" + contents
// The gist appears to be somewhat non-deterministic (but its
// decoding stays the same), so we populate the expected
// contents based on the particular gist.
expected := fmt.Sprintf(`
-- plan is incomplete due to gist matching: %s
• scan
table: gist@gist_pkey
spans: FULL SCAN`, gist)
if contents != expected {
return errors.Newf("unexpected contents of plan.txn\nexpected:\n%s\ngot:\n%s", expected, contents)
// We don't hard-code the full expected output here so that it
// doesn't need an update every time we change EXPLAIN ANALYZE
// output format. Instead, we only assert that a few lines are
// present in the output.
for _, expectedLine := range []string{
"• scan",
" sql nodes: n1",
" actual row count: 10",
" table: gist@gist_pkey",
" spans: FULL SCAN",
} {
if !strings.Contains(contents, expectedLine) {
return errors.Newf("didn't find %q in the output: %v", expectedLine, contents)
}
}
return nil
}, false, /* expectErrors */
Expand Down
149 changes: 69 additions & 80 deletions pkg/sql/instrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"bytes"
"context"
"fmt"
"strings"
"time"

"github.com/cockroachdb/cockroach/pkg/keys"
Expand Down Expand Up @@ -92,12 +91,6 @@ type instrumentationHelper struct {
// statement; it triggers saving of extra information like the plan string.
collectBundle bool

// planGistMatchingBundle is set when the bundle collection was enabled for
// a request with plan-gist matching enabled. In particular, such a bundle
// will be somewhat incomplete (it'll miss the plan string as well as the
// trace will miss all the events that happened in the optimizer).
planGistMatchingBundle bool

// collectExecStats is set when we are collecting execution statistics for a
// statement.
collectExecStats bool
Expand Down Expand Up @@ -534,46 +527,77 @@ func (ih *instrumentationHelper) Setup(
// provided fingerprint and plan gist. It assumes that the bundle is not
// currently being collected.
func (ih *instrumentationHelper) setupWithPlanGist(
ctx context.Context, cfg *ExecutorConfig, fingerprint, planGist string, plan *planTop,
ctx context.Context, p *planner, cfg *ExecutorConfig,
) context.Context {
planGist := ih.planGist.String()
ih.collectBundle, ih.diagRequestID, ih.diagRequest =
ih.stmtDiagnosticsRecorder.ShouldCollectDiagnostics(ctx, fingerprint, planGist)
// IsRedacted will be false when ih.collectBundle is false.
ih.stmtDiagnosticsRecorder.ShouldCollectDiagnostics(ctx, p.stmt.StmtNoConstants, planGist)
if !ih.collectBundle {
// We won't need the memo and the catalog, so free it up.
p.curPlan.mem = nil
p.curPlan.catalog = nil
return ctx
}
ih.explainFlags.RedactValues = ih.explainFlags.RedactValues || ih.diagRequest.IsRedacted()
if ih.collectBundle {
ih.needFinish = true
ih.collectExecStats = true
ih.planGistMatchingBundle = true
if ih.sp == nil || !ih.sp.IsVerbose() {
// We will create a verbose span
// - if we don't have a span yet, or
// - we do have a span, but it's not verbose.
//
// ih.sp can be non-nil and non-verbose when it was created in Setup
// because the stmt got sampled (i.e. ih.collectExecStats was true).
// (Note that it couldn't have been EXPLAIN ANALYZE code path in
// Setup because it uses a different output mode.) In any case,
// we're responsible for finishing this span, so we reassign it to
// ih.parentSp to keep track of.
//
// Note that we don't need to explicitly use ih.sp when creating a
// child span because it's implicitly stored in ctx.
if ih.sp != nil {
ih.parentSp = ih.sp
}
ctx, ih.sp = tracing.EnsureChildSpan(
ctx, cfg.AmbientCtx.Tracer, "plan-gist bundle",
tracing.WithRecording(tracingpb.RecordingVerbose),
)
ih.shouldFinishSpan = true
ih.finalizeSetup(ctx, cfg)
log.VEventf(ctx, 1, "plan-gist matching bundle collection began after the optimizer finished its part")
ih.needFinish = true
ih.collectExecStats = true
if ih.sp == nil || !ih.sp.IsVerbose() {
// We will create a verbose span
// - if we don't have a span yet, or
// - we do have a span, but it's not verbose.
//
// ih.sp can be non-nil and non-verbose when it was created in Setup
// because the stmt got sampled (i.e. ih.collectExecStats was true).
// (Note that it couldn't have been EXPLAIN ANALYZE code path in Setup
// because it uses a different output mode.) In any case, we're
// responsible for finishing this span, so we reassign it to ih.parentSp
// to keep track of.
//
// Note that we don't need to explicitly use ih.sp when creating a child
// span because it's implicitly stored in ctx.
if ih.sp != nil {
ih.parentSp = ih.sp
}
ctx, ih.sp = tracing.EnsureChildSpan(
ctx, cfg.AmbientCtx.Tracer, "plan-gist bundle",
tracing.WithRecording(tracingpb.RecordingVerbose),
)
ih.shouldFinishSpan = true
ih.finalizeSetup(ctx, cfg)
}
log.VEventf(ctx, 1, "plan-gist matching bundle collection began after the optimizer finished its part")
if cfg.TestingKnobs.DeterministicExplain {
ih.explainFlags.Deflake = explain.DeflakeAll
}
// Since we haven't enabled the bundle collection before the optimization,
// explain plan wasn't populated. We'll rerun the execbuilder with the
// explain factory to get that (the explain factory will be used because we
// now have collectBundle set to true).
//
// Disable telemetry in order to not double count things since we've already
// built the plan once.
const disableTelemetryAndPlanGists = true
// Note that we don't reset the optPlanningCtx because it was already reset
// and set up when we created the original optimizer plan.
// TODO: do we need to do this? Looks like we always do this when performing
// optbuilder + execbuild, but here we only do the latter.
origPlanComponents := p.curPlan.planComponents
err := p.runExecBuild(ctx, p.curPlan.mem, disableTelemetryAndPlanGists)
if err != nil {
// This seems unexpected, but let's proceed with the original plan.
if buildutil.CrdbTestBuild {
panic(errors.NewAssertionErrorWithWrappedErrf(err, "unexpectedly got an error when rerun execbuild due to plan-gist match"))
} else {
log.VEventf(ctx, 1, "hit an error when using explain factory: %v", err)
p.curPlan.planComponents = origPlanComponents
}
} else {
// We won't need the memo and the catalog, so free it up.
plan.mem = nil
plan.catalog = nil
// We need to close the original plan since we're going to overwrite it.
// Note that the new plan will be closed correctly by the defer in
// dispatchToExecutionEngine.
origPlanComponents.close(ctx)
}

return ctx
}

Expand Down Expand Up @@ -675,45 +699,10 @@ func (ih *instrumentationHelper) Finish(
}
}
planString := ob.BuildString()
if ih.planGistMatchingBundle {
// We don't have the plan string available since the stmt bundle
// collection was enabled _after_ the optimizer was done.
// Instead, we do have the gist available, so we'll decode it
// and use that as the plan string.
var sb strings.Builder
sb.WriteString("-- plan is incomplete due to gist matching: ")
sb.WriteString(ih.planGist.String())
// Perform best-effort decoding ignoring all errors.
if it, err := ie.QueryIterator(
bundleCtx, "plan-gist-decoding" /* opName */, nil, /* txn */
fmt.Sprintf("SELECT * FROM crdb_internal.decode_plan_gist('%s')", ih.planGist.String()),
); err == nil {
defer func() {
_ = it.Close()
}()
sb.WriteString("\n")
// Ignore the errors returned on Next call.
for ok, _ = it.Next(bundleCtx); ok; ok, _ = it.Next(bundleCtx) {
row := it.Cur()
var line string
// Be conservative in case the output format changes.
if len(row) == 1 {
var ds tree.DString
ds, ok = tree.AsDString(row[0])
line = string(ds)
} else {
ok = false
}
if !ok && buildutil.CrdbTestBuild {
return errors.AssertionFailedf("unexpected output format for decoding plan gist %s", ih.planGist.String())
}
if ok {
sb.WriteString("\n")
sb.WriteString(line)
}
}
}
planString = sb.String()
if planString == "" {
// This should only happen with plan-gist matching where we hit
// an error when using the explain factory.
planString = "-- plan is missing, probably hit an error with gist matching: " + ih.planGist.String()
}
bundle = buildStatementBundle(
bundleCtx, ih.explainFlags, cfg.DB, p, ie.(*InternalExecutor),
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/opt/exec/execbuilder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,11 @@ func New(
return b
}

// DisableTelemetry disables telemetry on this Builder.
func (b *Builder) DisableTelemetry() {
b.disableTelemetry = true
}

// Build constructs the execution node tree and returns its root node if no
// error occurred.
func (b *Builder) Build() (_ exec.Plan, err error) {
Expand Down
Loading

0 comments on commit 27b9046

Please sign in to comment.