Skip to content

Commit

Permalink
feat(k6): add scenario with multiple stages ramping up/down RPS (#6031)
Browse files Browse the repository at this point in the history
The added load test scenario allows one to configure an arbitrary number
of stages, with each consisting of a linear ramp-up/down to the desired
requests per second and a hold/plateau time.

Within each stage, the duration for which the inference RPS is held constant
is configured via one element in the `CONSTANT_RATE_DURATIONS_SECONDS`
environment variable (a vector of comma separated values), with the ramp-up/
down duration preceding it being 1/3rd of the hold time.
  • Loading branch information
lc525 authored Dec 12, 2024
1 parent 34cf313 commit 373df43
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 1 deletion.
47 changes: 46 additions & 1 deletion tests/k6/components/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -238,18 +238,60 @@ function doWarmup() {

function requestRate() {
if (__ENV.REQUEST_RATE) {
return __ENV.REQUEST_RATE
return parseInt(__ENV.REQUEST_RATE)
}
return 10
}

function requestRates() {
if (__ENV.REQUEST_RATES) {
return __ENV.REQUEST_RATES.split(",").map( s => parseInt(s))
}
return [requestRate()]
}

function rateStages() {
if (__ENV.REQUEST_RATES) {
var stages = []
var durations = constantRateDurationsSeconds()
var rates = requestRates()
for (var i = 0; i < rates.length; i++) {
// ramp up (1/3 rd of the duration)
stages.push({target: rates[i], duration: Math.ceil(durations[i]/3).toString()+'s'})
// hold
stages.push({target: rates[i], duration: durations[i].toString()+'s'})
}
return stages
}
return [{target: requestRate(), duration: constantRateDurationSeconds().toString()+'s'}]
}

function constantRateDurationSeconds() {
if (__ENV.CONSTANT_RATE_DURATION_SECONDS) {
return __ENV.CONSTANT_RATE_DURATION_SECONDS
}
return 30
}

function constantRateDurationsSeconds() {
if (__ENV.CONSTANT_RATE_DURATIONS_SECONDS) {
var durations = __ENV.CONSTANT_RATE_DURATIONS_SECONDS.split(",").map( s => parseInt(s))
if (durations.length > requestRates().length) {
return durations.slice(0, requestRates().length)
} else if (durations.length < requestRates().length) {
// pad with the last value
const last = durations[durations.length - 1]
for (var i = durations.length; i < requestRates().length; i++) {
durations.push(last)
}
} else {
return durations
}
}
const reqNumberOfStages = requestRates().length
return new Array(reqNumberOfStages).fill(constantRateDurationSeconds()/reqNumberOfStages)
}

function podNamespace() {
if (__ENV.NAMESPACE) {
return __ENV.NAMESPACE
Expand Down Expand Up @@ -349,7 +391,10 @@ export function getConfig() {
"inferType" : inferType(),
"doWarmup": doWarmup(),
"requestRate": requestRate(),
"requestRates": requestRates(),
"constantRateDurationSeconds": constantRateDurationSeconds(),
"constantRateDurationsSeconds": constantRateDurationsSeconds(),
"rateStages": rateStages(),
"modelReplicas": modelReplicas(),
"maxModelReplicas": maxModelReplicas(),
"namespace": podNamespace(),
Expand Down
31 changes: 31 additions & 0 deletions tests/k6/configs/k8s/base/k6.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ spec:
# "csv=results/base.gz",
# "scenarios/infer_constant_rate.js",
# ]
# # infer_multiple_rates
# args: [
# "--no-teardown",
# "--summary-export",
# "results/base.json",
# "--out",
# "csv=results/base.gz",
# "scenarios/infer_multiple_rates.js",
# ]
# # k8s-test-script
# args: [
# "--summary-export",
Expand All @@ -61,8 +70,30 @@ spec:
# "scenarios/core2_qa_control_plane_ops.js",
# ]
env:
- name: REQUEST_RATE
value: "20"
# REQUEST_RATES allows you to specify multiple target RPS values, and is
# used in the infer_multiple_rates scenario. The duration over which
# each rate is maintained is controlled via the value at the same index
# in the CONSTANT_RATE_DURATIONS_SECONDS variable. Rather than a sudden
# change in RPS, additional stages lasting 1/3rd of the duration over
# which the RPS is held constant are automatically introduced, and
# perform either a linear ramp-up or ramp-down to the next target RPS.
# - name: REQUEST_RATES
# value: "10,70,10,50,20"
- name: CONSTANT_RATE_DURATION_SECONDS
value: "60"
# CONSTANT_RATE_DURATIONS_SECONDS is used in the infer_multiple_rates
# scenario. It specifies the duration in seconds for which each target
# RPS value is maintained. If the sum of the values here is S, the total
# duration of the test will be S + S/3 (with the latter due to the added
# ramp-up/ramp-down stages)
# - name: CONSTANT_RATE_DURATIONS_SECONDS
# value: "120,120,400,120,400"
- name: USE_KUBE_CONTROL_PLANE
value: "true"
# - name: SKIP_UNLOAD_MODEL
# value: "true"
- name: SCHEDULER_ENDPOINT
value: "${SCHEDULER_ENDPOINT}:9004"
- name: INFER_HTTP_ITERATIONS
Expand Down
79 changes: 79 additions & 0 deletions tests/k6/scenarios/infer_multiple_rates.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { getConfig } from '../components/settings.js'
import { doInfer, setupBase, teardownBase, getVersionSuffix, applyModelReplicaChange } from '../components/utils.js'
import { vu } from 'k6/execution';

export const options = {
thresholds: {
'http_req_duration{scenario:default}': [`max>=0`],
'http_reqs{scenario:default}': [],
'grpc_req_duration{scenario:default}': [`max>=0`],
'data_received{scenario:default}': [],
'data_sent{scenario:default}': [],
},
scenarios: {
ramping_request_rates: {
startTime: '0s',
executor: 'ramping-arrival-rate',
startRate: 5,
timeUnit: '1s',
preAllocatedVUs: 50, // how large the initial pool of VUs would be
maxVUs: 1000, // if the preAllocatedVUs are not enough, we can initialize more
stages: getConfig().rateStages,
},
},
setupTimeout: '6000s',
teardownTimeout: '6000s',
};

export function setup() {
const config = getConfig()

setupBase(config)
console.log("rate stages:", getConfig().rateStages)

return config
}

export default function (config) {
const numModelTypes = config.modelType.length

let candidateIdxs = []
for (let i = 0; i < numModelTypes; i++) {
if (config.maxNumModels[i] !== 0)
candidateIdxs.push(i)
}
const numCandidates = candidateIdxs.length
var idx = candidateIdxs[Math.floor(Math.random() * numCandidates)]

const modelId = Math.floor(Math.random() * config.maxNumModels[idx])
const modelName = config.modelNamePrefix[idx] + modelId.toString()

const modelNameWithVersion = modelName + getVersionSuffix(config.isSchedulerProxy) // first version

var rest_enabled = Number(config.inferHttpIterations)
var grpc_enabled = Number(config.inferGrpcIterations)
if (rest_enabled && grpc_enabled) {
// if both protocols are enabled, choose one randomly
const rand = Math.random()
if (rand > 0.5) {
doInfer(modelName, modelNameWithVersion, config, true, idx) // rest
} else {
doInfer(modelName, modelNameWithVersion, config, false, idx) // grpc
}
} else if (rest_enabled) {
doInfer(modelName, modelNameWithVersion, config, true, idx)
} else if (grpc_enabled) {
doInfer(modelName, modelNameWithVersion, config, false, idx)
} else {
throw new Error('Both REST and GRPC protocols are disabled!')
}

// for simplicity we only change model replicas in the first VU
if (vu.idInTest == 1 && config.enableModelReplicaChange) {
applyModelReplicaChange(config)
}
}

export function teardown(config) {
teardownBase(config)
}

0 comments on commit 373df43

Please sign in to comment.