From 1638c4ebcf2be564427b067473685f8b72e8b85e Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Thu, 23 May 2024 15:55:20 -0700 Subject: [PATCH] sql: adjust memory limits in COPY a bit When deciding how many rows can be processed by COPY at once (meaning that a single KV BatchRequest will handle all of them), we use a fraction of the raft command size. This is needed so that we can safely perform the necessary writes without getting "command is too large" error. Previously, we would always use one third of the command size for COPY. However, this can be problematic when the table has multiple indexes since each input row will then get a separate InitPut command for each secondary index. As a result, we could be too optimistic when using the current fraction as we've seen the COPY roachtest occasionally fail with "command is too large". This commit attempts to alleviate this problem by making the fraction depend on the number of indexes: we still keep 1/3 as the upper bound that is used with no secondary indexes, but each secondary index deducts 1.5%, and we have the lower bound of 0.1 which is achieved at about 16 secondary indexes. Note that I still don't know why the roachtest failure is non-deterministic - perhaps it's due to hitting the intent tracking limit for a txn. Additionally, this commit introduces a cluster setting that controls this fraction, with the default value of 0 meaning using the strategy as described above. Release note: None --- pkg/sql/copy_from.go | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/pkg/sql/copy_from.go b/pkg/sql/copy_from.go index d4c2193ef7ba..c10dbf7570eb 100644 --- a/pkg/sql/copy_from.go +++ b/pkg/sql/copy_from.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/col/coldataext" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" + "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo" "github.com/cockroachdb/cockroach/pkg/sql/catalog/resolver" @@ -363,7 +364,25 @@ func newCopyMachine( // exceed this due to large dynamic values we will bail early and // insert the rows we have so far. Note once the coldata.Batch is full // we still have all the encoder allocations to make. - c.maxRowMem = kvserverbase.MaxCommandSize.Get(c.p.execCfg.SV()) / 3 + // + // We also make the fraction depend on the number of indexes in the table + // since each secondary index will require a separate InitPut command for + // each input row. We want to pick the fraction to be in [0.1, 0.33] range + // so that 0.33 is used with no secondary indexes and 0.1 is used with 16 or + // more secondary indexes. + maxCommandFraction := copyMaxCommandSizeFraction.Get(c.p.execCfg.SV()) + if maxCommandFraction == 0 { + maxCommandFraction = 1.0 / 3.0 + if numIndexes := len(tableDesc.AllIndexes()); numIndexes > 1 { + // Each additional secondary index is "penalized" by reducing the + // fraction by 1.5%, until 0.1 which is the lower bound. + maxCommandFraction -= 0.015 * float64(numIndexes-1) + if maxCommandFraction < 0.1 { + maxCommandFraction = 0.1 + } + } + } + c.maxRowMem = int64(float64(kvserverbase.MaxCommandSize.Get(c.p.execCfg.SV())) * maxCommandFraction) if c.canSupportVectorized(tableDesc) { if err := c.initVectorizedCopy(ctx, typs); err != nil { @@ -378,6 +397,16 @@ func newCopyMachine( return c, nil } +var copyMaxCommandSizeFraction = settings.RegisterFloatSetting( + settings.ApplicationLevel, + "sql.copy.max_command_size_fraction", + "determines the fraction of kv.raft.command.max_size that is used when "+ + "sizing batches of rows when processing COPY commands. Use 0 for default "+ + "adaptive strategy that considers number of secondary indexes", + 0.0, + settings.Fraction, +) + func (c *copyMachine) canSupportVectorized(table catalog.TableDescriptor) bool { // TODO(cucaroach): support vectorized binary. if c.format == tree.CopyFormatBinary {