Skip to content

Commit

Permalink
roachtest: increase the token return time with disk bandwidth limit
Browse files Browse the repository at this point in the history
Previously the perturbation/* tests would wait 10m for tokens to be
returned. Without the disk bandwidth limit set, they typically return
almost immediately but with a limit they can take ~30m to return in some
cases even after the workload is stopped and the system is idle. This
change fixes some of the perturbation/metamorphic/* tests that are
hitting this slow token return. Additionally this change reduces the
token wait time for the test
admission-control/elastic-workload/mixed-version to 1 minute as this
test doesn't typically wait more then a few seconds for token return.

Epic: none
Fixes: #136982
Fixes: #136553
Informs: #137017

Release note: None
  • Loading branch information
andrewbaptist committed Dec 10, 2024
1 parent 4a8eb1d commit 87b463a
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
10 changes: 7 additions & 3 deletions pkg/cmd/roachtest/roachtestutil/validation_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,11 @@ func CheckInvalidDescriptors(ctx context.Context, db *gosql.DB) error {
// validateTokensReturned ensures that all RACv2 tokens are returned to the pool
// at the end of the test.
func ValidateTokensReturned(
ctx context.Context, t test.Test, c cluster.Cluster, nodes option.NodeListOption,
ctx context.Context,
t test.Test,
c cluster.Cluster,
nodes option.NodeListOption,
waitTime time.Duration,
) {
t.L().Printf("validating all tokens returned")
for _, node := range nodes {
Expand Down Expand Up @@ -163,10 +167,10 @@ func ValidateTokensReturned(
}
}
return nil
// We wait up to 10 minutes for the tokens to be returned. In tests which
// We wait up to waitTime for the tokens to be returned. In tests which
// purposefully create a send queue towards a node, the queue may take a
// while to drain. The tokens will not be returned until the queue is
// empty and there are no inflight requests.
}, 10*time.Minute)
}, waitTime)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func registerElasticWorkloadMixedVersion(r registry.Registry) {
mvt.Run()
// TODO(pav-kv): also validate that the write throughput was kept under
// control, and the foreground traffic was not starved.
roachtestutil.ValidateTokensReturned(ctx, t, c, c.CRDBNodes())
roachtestutil.ValidateTokensReturned(ctx, t, c, c.CRDBNodes(), time.Minute)
},
})
}
9 changes: 8 additions & 1 deletion pkg/cmd/roachtest/tests/perturbation/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,14 @@ func (v variations) runTest(ctx context.Context, t test.Test, c cluster.Cluster)
t.L().Printf("validating stats after the perturbation")
failures = append(failures, isAcceptableChange(t.L(), baselineStats, afterStats, v.acceptableChange)...)
require.True(t, len(failures) == 0, strings.Join(failures, "\n"))
roachtestutil.ValidateTokensReturned(ctx, t, v, v.stableNodes())
// TODO(baptist): Look at the time for token return in actual tests to
// determine if this can be lowered further.
tokenReturnTime := 10 * time.Minute
// TODO(#137017): Increase the return time if disk bandwidth limit is set.
if v.diskBandwidthLimit != "0" {
tokenReturnTime = 1 * time.Hour
}
roachtestutil.ValidateTokensReturned(ctx, t, v, v.stableNodes(), tokenReturnTime)
}

func (v variations) applyClusterSettings(ctx context.Context, t test.Test) {
Expand Down

0 comments on commit 87b463a

Please sign in to comment.