From 373df43770d496ae10c7ad00f40008ce3f55294b Mon Sep 17 00:00:00 2001 From: Lucian Carata Date: Thu, 12 Dec 2024 01:09:30 +0000 Subject: [PATCH] feat(k6): add scenario with multiple stages ramping up/down RPS (#6031) The added load test scenario allows one to configure an arbitrary number of stages, with each consisting of a linear ramp-up/down to the desired requests per second and a hold/plateau time. Within each stage, the duration for which the inference RPS is held constant is configured via one element in the `CONSTANT_RATE_DURATIONS_SECONDS` environment variable (a vector of comma separated values), with the ramp-up/ down duration preceding it being 1/3rd of the hold time. --- tests/k6/components/settings.js | 47 ++++++++++++- tests/k6/configs/k8s/base/k6.yaml | 31 +++++++++ tests/k6/scenarios/infer_multiple_rates.js | 79 ++++++++++++++++++++++ 3 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 tests/k6/scenarios/infer_multiple_rates.js diff --git a/tests/k6/components/settings.js b/tests/k6/components/settings.js index 503efcc2c1..e9a214cdbb 100644 --- a/tests/k6/components/settings.js +++ b/tests/k6/components/settings.js @@ -238,11 +238,34 @@ function doWarmup() { function requestRate() { if (__ENV.REQUEST_RATE) { - return __ENV.REQUEST_RATE + return parseInt(__ENV.REQUEST_RATE) } return 10 } +function requestRates() { + if (__ENV.REQUEST_RATES) { + return __ENV.REQUEST_RATES.split(",").map( s => parseInt(s)) + } + return [requestRate()] +} + +function rateStages() { + if (__ENV.REQUEST_RATES) { + var stages = [] + var durations = constantRateDurationsSeconds() + var rates = requestRates() + for (var i = 0; i < rates.length; i++) { + // ramp up (1/3 rd of the duration) + stages.push({target: rates[i], duration: Math.ceil(durations[i]/3).toString()+'s'}) + // hold + stages.push({target: rates[i], duration: durations[i].toString()+'s'}) + } + return stages + } + return [{target: requestRate(), duration: constantRateDurationSeconds().toString()+'s'}] +} + function constantRateDurationSeconds() { if (__ENV.CONSTANT_RATE_DURATION_SECONDS) { return __ENV.CONSTANT_RATE_DURATION_SECONDS @@ -250,6 +273,25 @@ function constantRateDurationSeconds() { return 30 } +function constantRateDurationsSeconds() { + if (__ENV.CONSTANT_RATE_DURATIONS_SECONDS) { + var durations = __ENV.CONSTANT_RATE_DURATIONS_SECONDS.split(",").map( s => parseInt(s)) + if (durations.length > requestRates().length) { + return durations.slice(0, requestRates().length) + } else if (durations.length < requestRates().length) { + // pad with the last value + const last = durations[durations.length - 1] + for (var i = durations.length; i < requestRates().length; i++) { + durations.push(last) + } + } else { + return durations + } + } + const reqNumberOfStages = requestRates().length + return new Array(reqNumberOfStages).fill(constantRateDurationSeconds()/reqNumberOfStages) +} + function podNamespace() { if (__ENV.NAMESPACE) { return __ENV.NAMESPACE @@ -349,7 +391,10 @@ export function getConfig() { "inferType" : inferType(), "doWarmup": doWarmup(), "requestRate": requestRate(), + "requestRates": requestRates(), "constantRateDurationSeconds": constantRateDurationSeconds(), + "constantRateDurationsSeconds": constantRateDurationsSeconds(), + "rateStages": rateStages(), "modelReplicas": modelReplicas(), "maxModelReplicas": maxModelReplicas(), "namespace": podNamespace(), diff --git a/tests/k6/configs/k8s/base/k6.yaml b/tests/k6/configs/k8s/base/k6.yaml index 892c4435dd..a60a5a36b2 100644 --- a/tests/k6/configs/k8s/base/k6.yaml +++ b/tests/k6/configs/k8s/base/k6.yaml @@ -36,6 +36,15 @@ spec: # "csv=results/base.gz", # "scenarios/infer_constant_rate.js", # ] + # # infer_multiple_rates + # args: [ + # "--no-teardown", + # "--summary-export", + # "results/base.json", + # "--out", + # "csv=results/base.gz", + # "scenarios/infer_multiple_rates.js", + # ] # # k8s-test-script # args: [ # "--summary-export", @@ -61,8 +70,30 @@ spec: # "scenarios/core2_qa_control_plane_ops.js", # ] env: + - name: REQUEST_RATE + value: "20" + # REQUEST_RATES allows you to specify multiple target RPS values, and is + # used in the infer_multiple_rates scenario. The duration over which + # each rate is maintained is controlled via the value at the same index + # in the CONSTANT_RATE_DURATIONS_SECONDS variable. Rather than a sudden + # change in RPS, additional stages lasting 1/3rd of the duration over + # which the RPS is held constant are automatically introduced, and + # perform either a linear ramp-up or ramp-down to the next target RPS. + # - name: REQUEST_RATES + # value: "10,70,10,50,20" + - name: CONSTANT_RATE_DURATION_SECONDS + value: "60" + # CONSTANT_RATE_DURATIONS_SECONDS is used in the infer_multiple_rates + # scenario. It specifies the duration in seconds for which each target + # RPS value is maintained. If the sum of the values here is S, the total + # duration of the test will be S + S/3 (with the latter due to the added + # ramp-up/ramp-down stages) + # - name: CONSTANT_RATE_DURATIONS_SECONDS + # value: "120,120,400,120,400" - name: USE_KUBE_CONTROL_PLANE value: "true" + # - name: SKIP_UNLOAD_MODEL + # value: "true" - name: SCHEDULER_ENDPOINT value: "${SCHEDULER_ENDPOINT}:9004" - name: INFER_HTTP_ITERATIONS diff --git a/tests/k6/scenarios/infer_multiple_rates.js b/tests/k6/scenarios/infer_multiple_rates.js new file mode 100644 index 0000000000..d72b1e875e --- /dev/null +++ b/tests/k6/scenarios/infer_multiple_rates.js @@ -0,0 +1,79 @@ +import { getConfig } from '../components/settings.js' +import { doInfer, setupBase, teardownBase, getVersionSuffix, applyModelReplicaChange } from '../components/utils.js' +import { vu } from 'k6/execution'; + +export const options = { + thresholds: { + 'http_req_duration{scenario:default}': [`max>=0`], + 'http_reqs{scenario:default}': [], + 'grpc_req_duration{scenario:default}': [`max>=0`], + 'data_received{scenario:default}': [], + 'data_sent{scenario:default}': [], + }, + scenarios: { + ramping_request_rates: { + startTime: '0s', + executor: 'ramping-arrival-rate', + startRate: 5, + timeUnit: '1s', + preAllocatedVUs: 50, // how large the initial pool of VUs would be + maxVUs: 1000, // if the preAllocatedVUs are not enough, we can initialize more + stages: getConfig().rateStages, + }, + }, + setupTimeout: '6000s', + teardownTimeout: '6000s', +}; + +export function setup() { + const config = getConfig() + + setupBase(config) + console.log("rate stages:", getConfig().rateStages) + + return config +} + +export default function (config) { + const numModelTypes = config.modelType.length + + let candidateIdxs = [] + for (let i = 0; i < numModelTypes; i++) { + if (config.maxNumModels[i] !== 0) + candidateIdxs.push(i) + } + const numCandidates = candidateIdxs.length + var idx = candidateIdxs[Math.floor(Math.random() * numCandidates)] + + const modelId = Math.floor(Math.random() * config.maxNumModels[idx]) + const modelName = config.modelNamePrefix[idx] + modelId.toString() + + const modelNameWithVersion = modelName + getVersionSuffix(config.isSchedulerProxy) // first version + + var rest_enabled = Number(config.inferHttpIterations) + var grpc_enabled = Number(config.inferGrpcIterations) + if (rest_enabled && grpc_enabled) { + // if both protocols are enabled, choose one randomly + const rand = Math.random() + if (rand > 0.5) { + doInfer(modelName, modelNameWithVersion, config, true, idx) // rest + } else { + doInfer(modelName, modelNameWithVersion, config, false, idx) // grpc + } + } else if (rest_enabled) { + doInfer(modelName, modelNameWithVersion, config, true, idx) + } else if (grpc_enabled) { + doInfer(modelName, modelNameWithVersion, config, false, idx) + } else { + throw new Error('Both REST and GRPC protocols are disabled!') + } + + // for simplicity we only change model replicas in the first VU + if (vu.idInTest == 1 && config.enableModelReplicaChange) { + applyModelReplicaChange(config) + } +} + +export function teardown(config) { + teardownBase(config) +}