feat(k6): add scenario with multiple stages ramping up/down RPS (#6031)

The added load test scenario allows one to configure an arbitrary number of stages, with each consisting of a linear ramp-up/down to the desired requests per second and a hold/plateau time. Within each stage, the duration for which the inference RPS is held constant is configured via one element in the `CONSTANT_RATE_DURATIONS_SECONDS` environment variable (a vector of comma separated values), with the ramp-up/ down duration preceding it being 1/3rd of the hold time.
SeldonIO · Dec 12, 2024 · 373df43 · 373df43
1 parent 34cf313
commit 373df43
Show file tree

Hide file tree

Showing 3 changed files with 156 additions and 1 deletion.
diff --git a/tests/k6/components/settings.js b/tests/k6/components/settings.js
@@ -238,18 +238,60 @@ function doWarmup() {
 
 function requestRate() {
     if (__ENV.REQUEST_RATE) {
-        return __ENV.REQUEST_RATE
+        return parseInt(__ENV.REQUEST_RATE)
     }
     return 10
 }
 
+function requestRates() {
+    if (__ENV.REQUEST_RATES) {
+        return __ENV.REQUEST_RATES.split(",").map( s => parseInt(s))
+    }
+    return [requestRate()]
+}
+
+function rateStages() {
+    if (__ENV.REQUEST_RATES) {
+        var stages = []
+        var durations = constantRateDurationsSeconds()
+        var rates = requestRates()
+        for (var i = 0; i < rates.length; i++) {
+            // ramp up (1/3 rd of the duration)
+            stages.push({target: rates[i], duration: Math.ceil(durations[i]/3).toString()+'s'})
+            // hold
+            stages.push({target: rates[i], duration: durations[i].toString()+'s'})
+        }
+        return stages
+    }
+    return [{target: requestRate(), duration: constantRateDurationSeconds().toString()+'s'}]
+}
+
 function constantRateDurationSeconds() {
     if (__ENV.CONSTANT_RATE_DURATION_SECONDS) {
         return __ENV.CONSTANT_RATE_DURATION_SECONDS
     }
     return 30
 }
 
+function constantRateDurationsSeconds() {
+    if (__ENV.CONSTANT_RATE_DURATIONS_SECONDS) {
+        var durations = __ENV.CONSTANT_RATE_DURATIONS_SECONDS.split(",").map( s => parseInt(s))
+        if (durations.length > requestRates().length) {
+            return durations.slice(0, requestRates().length)
+        } else if (durations.length < requestRates().length) {
+            // pad with the last value
+            const last = durations[durations.length - 1]
+            for (var i = durations.length; i < requestRates().length; i++) {
+                durations.push(last)
+            }
+        } else {
+            return durations
+        }
+    }
+    const reqNumberOfStages = requestRates().length
+    return new Array(reqNumberOfStages).fill(constantRateDurationSeconds()/reqNumberOfStages)
+}
+
 function podNamespace() {
     if (__ENV.NAMESPACE) {
         return __ENV.NAMESPACE
@@ -349,7 +391,10 @@ export function getConfig() {
         "inferType" : inferType(),
         "doWarmup": doWarmup(),
         "requestRate": requestRate(),
+        "requestRates": requestRates(),
         "constantRateDurationSeconds": constantRateDurationSeconds(),
+        "constantRateDurationsSeconds": constantRateDurationsSeconds(),
+        "rateStages": rateStages(),
         "modelReplicas": modelReplicas(),
         "maxModelReplicas": maxModelReplicas(),
         "namespace":  podNamespace(),

diff --git a/tests/k6/configs/k8s/base/k6.yaml b/tests/k6/configs/k8s/base/k6.yaml
@@ -36,6 +36,15 @@ spec:
         #   "csv=results/base.gz",
         #   "scenarios/infer_constant_rate.js",
         #   ]
+        # # infer_multiple_rates
+        # args: [
+        #   "--no-teardown",
+        #   "--summary-export",
+        #   "results/base.json",
+        #   "--out",
+        #   "csv=results/base.gz",
+        #   "scenarios/infer_multiple_rates.js",
+        #   ]
         # # k8s-test-script
         # args: [
         #   "--summary-export",
@@ -61,8 +70,30 @@ spec:
         #   "scenarios/core2_qa_control_plane_ops.js",
         #   ]
         env:
+        - name: REQUEST_RATE
+          value: "20"
+        # REQUEST_RATES allows you to specify multiple target RPS values, and is
+        # used in the infer_multiple_rates scenario. The duration over which
+        # each rate is maintained is controlled via the value at the same index
+        # in the CONSTANT_RATE_DURATIONS_SECONDS variable. Rather than a sudden
+        # change in RPS, additional stages lasting 1/3rd of the duration over
+        # which the RPS is held constant are automatically introduced, and
+        # perform either a linear ramp-up or ramp-down to the next target RPS.
+        # - name: REQUEST_RATES
+        #   value: "10,70,10,50,20"
+        - name: CONSTANT_RATE_DURATION_SECONDS
+          value: "60"
+        # CONSTANT_RATE_DURATIONS_SECONDS is used in the infer_multiple_rates
+        # scenario. It specifies the duration in seconds for which each target
+        # RPS value is maintained. If the sum of the values here is S, the total
+        # duration of the test will be S + S/3 (with the latter due to the added
+        # ramp-up/ramp-down stages)
+        # - name: CONSTANT_RATE_DURATIONS_SECONDS
+        #   value: "120,120,400,120,400"
         - name: USE_KUBE_CONTROL_PLANE
           value: "true"
+        # - name: SKIP_UNLOAD_MODEL
+        #   value: "true"
         - name: SCHEDULER_ENDPOINT
           value: "${SCHEDULER_ENDPOINT}:9004"
         - name: INFER_HTTP_ITERATIONS

diff --git a/tests/k6/scenarios/infer_multiple_rates.js b/tests/k6/scenarios/infer_multiple_rates.js
@@ -0,0 +1,79 @@
+import { getConfig } from '../components/settings.js'
+import { doInfer, setupBase, teardownBase, getVersionSuffix, applyModelReplicaChange } from '../components/utils.js'
+import { vu } from 'k6/execution';
+
+export const options = {
+    thresholds: {
+        'http_req_duration{scenario:default}': [`max>=0`],
+        'http_reqs{scenario:default}': [],
+        'grpc_req_duration{scenario:default}': [`max>=0`],
+        'data_received{scenario:default}': [],
+        'data_sent{scenario:default}': [],
+    },
+    scenarios: {
+        ramping_request_rates: {
+            startTime: '0s',
+            executor: 'ramping-arrival-rate',
+            startRate: 5,
+            timeUnit: '1s',
+            preAllocatedVUs: 50, // how large the initial pool of VUs would be
+            maxVUs: 1000, // if the preAllocatedVUs are not enough, we can initialize more
+            stages: getConfig().rateStages,
+        },
+    },
+    setupTimeout: '6000s',
+    teardownTimeout: '6000s',
+};
+
+export function setup() {
+    const config = getConfig()
+
+    setupBase(config)
+    console.log("rate stages:", getConfig().rateStages)
+
+    return config
+}
+
+export default function (config) {
+    const numModelTypes = config.modelType.length
+
+    let candidateIdxs = []
+    for (let i = 0; i < numModelTypes; i++) {
+        if (config.maxNumModels[i] !== 0)
+            candidateIdxs.push(i)
+    }
+    const numCandidates = candidateIdxs.length
+    var idx = candidateIdxs[Math.floor(Math.random() * numCandidates)]
+
+    const modelId = Math.floor(Math.random() * config.maxNumModels[idx])
+    const modelName = config.modelNamePrefix[idx] + modelId.toString()
+
+    const modelNameWithVersion = modelName + getVersionSuffix(config.isSchedulerProxy)  // first version
+
+    var rest_enabled = Number(config.inferHttpIterations)
+    var grpc_enabled = Number(config.inferGrpcIterations)
+    if (rest_enabled && grpc_enabled) {
+        // if both protocols are enabled, choose one randomly
+        const rand = Math.random()
+        if (rand > 0.5) {
+            doInfer(modelName, modelNameWithVersion, config, true, idx) // rest
+        } else {
+            doInfer(modelName, modelNameWithVersion, config, false, idx) // grpc
+        }
+    } else if (rest_enabled) {
+        doInfer(modelName, modelNameWithVersion, config, true, idx)
+    } else if (grpc_enabled) {
+        doInfer(modelName, modelNameWithVersion, config, false, idx)
+    } else {
+        throw new Error('Both REST and GRPC protocols are disabled!')
+    }
+
+    // for simplicity we only change model replicas in the first VU
+    if (vu.idInTest == 1 && config.enableModelReplicaChange) {
+        applyModelReplicaChange(config)
+    }
+}
+
+export function teardown(config) {
+    teardownBase(config)
+}