forked from kata-containers/kata-containers
-
Notifications
You must be signed in to change notification settings - Fork 4
/
launch_times.sh
executable file
·482 lines (408 loc) · 13.7 KB
/
launch_times.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
#!/bin/bash
# Copyright (c) 2017-2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# Description of the test:
# This test takes a number of time measurements through the complete
# launch/shutdown cycle of a single container.
# From those measurements it derives a number of time measures, such as:
# - time to payload execution
# - time to get to VM kernel
# - time in VM kernel boot
# - time to quit
# - total time (from launch to finished)
#
# Note, the <image> used for this test must support the full 'date' command
# syntax - the date from busybox for instance *does not* support this, so
# will not work with this test.
#
# Note, this test launches a single container at a time, that quits - thus,
# this test measures times for the 'first container' only. This test does
# not look for any scalability slowdowns as the number of running containers
# increases for instance - that is handled in other tests
set -e
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
source "${SCRIPT_PATH}/../lib/common.bash"
# Calculating the kernel time from dmesg stamps only really works for VM
# based runtimes - we dynamically enable it if we find we are using a known
# VM runtime
CALCULATE_KERNEL=
REQUIRED_CMDS=("bc" "awk")
# set the total number of decimal digits after the decimal point
# for representing the calculations results
CALC_SCALE=4
# The date command format we use to ensure we capture the ns timings
# Note the no-0-padding - 0 padding the results breaks bc in some cases
DATECMD="date -u +%-s:%-N"
# The modern Linux RNG is extremely fast at generating entropy on demand
# and does not need to have as large a store of entropy anymore as the value
# of 256 was found to work well with common cryptographic algorithms
entropy_level="256"
# Grabs the number of iterations performed
num_iters=0
# sets to this max number of repetitons for failed runs
MAX_REPETITIONS=3
# The individual results are stored in an array
declare -a total_result_ds
declare -a to_workload_ds
declare -a in_kernel_ds
declare -a to_kernel_ds
declare -a to_quit_ds
# data_is_valid value 1 represent not valid
# data_is_valid value 0 represent is valid
data_is_valid=0
check_entropy_level() {
retries="10"
for i in $(seq 1 "$retries"); do
if [ $(cat /proc/sys/kernel/random/entropy_avail) -ge ${entropy_level} ]; then
break;
fi
sleep 1
done
if [ $(cat /proc/sys/kernel/random/entropy_avail) -lt ${entropy_level} ]; then
die "Not enough entropy level to run this test"
fi
}
# convert a 'seconds:nanoseconds' string into nanoseconds
sn_to_ns() {
# !!: Remove 0's from beginning otherwise the number will be converted to octal
s=$(echo ${1%:*} | sed 's/^0*//g')
ns=$(echo ${1##*:} | sed 's/^0*//g')
# use shell magic to strip out the 's' and 'ns' fields and print
# them as a 0-padded ns string...
printf "%d%09d" ${s} ${ns}
}
# convert 'nanoseconds' (since epoch) into a 'float' seconds
ns_to_s() {
printf "%.0${CALC_SCALE}f" $(bc <<< "scale=$CALC_SCALE; $1 / 1000000000")
}
run_workload() {
# L_CALC_SCALE is set to accounting a significant
# number of decimal digits after the decimal points
# for 'bc' performing math in kernel period estimation
L_CALC_SCALE=13
local CONTAINER_NAME="kata_launch_times_$(( $RANDOM % 1000 + 1))"
start_time=$($DATECMD)
# Check entropy level of the host
check_entropy_level
# Run the image and command and capture the results into an array...
declare workload_result
readarray -n 0 workload_result < <(sudo -E "${CTR_EXE}" run --rm --runtime ${CTR_RUNTIME} ${IMAGE} ${CONTAINER_NAME} bash -c "$DATECMD $DMESGCMD")
end_time=$($DATECMD)
# Delay this calculation until after we have run - do not want
# to measure it in the results
start_time=$(sn_to_ns $start_time)
end_time=$(sn_to_ns $end_time)
# Extract the 'date' info from the first line of the log
# This script assumes the VM clock is in sync with the host clock...
workload_time=${workload_result[0]}
workload_time=$(echo $workload_time | tr -d '\r')
workload_time=$(sn_to_ns $workload_time)
# How long did the whole launch/quit take
total_period=$((end_time-start_time))
# How long did it take to get to the workload
workload_period=$((workload_time-start_time))
# How long did it take to quit
shutdown_period=$((end_time-workload_time))
if [ -n "$CALCULATE_KERNEL" ]; then
# Grab the last kernel dmesg time
# In our case, we need to find the last real kernel line before
# the systemd lines begin. The last:
# 'Freeing unused kernel' line is a reasonable
# 'last in kernel line' to look for.
# We make a presumption here that as we are in a cold-boot VM
# kernel, the first dmesg is at '0 seconds', so the timestamp
# of that last line is the length of time in the kernel.
kernel_last_line=$( (fgrep "Freeing unused kernel" <<- EOF
${workload_result[@]}
EOF
) | tail -1 )
if [ -z "$kernel_last_line" ]; then
echo "No kernel last line"
for l in "${workload_result[@]}"; do
echo ">: [$l]"
done
die "No kernel last line"
fi
kernel_period=$(echo $kernel_last_line | awk '{print $2}' | tr -d "]")
# And we can then work out how much time it took to get to the kernel
to_kernel_period=$(printf "%f" $(bc <<<"scale=$L_CALC_SCALE; $(ns_to_s $workload_period) - $kernel_period"))
else
kernel_period="0.0"
to_kernel_period="0.0"
fi
total_result="$(ns_to_s $total_period)"
to_workload="$(ns_to_s $workload_period)"
in_kernel=$kernel_period
to_kernel=$to_kernel_period
to_quit=$(ns_to_s $shutdown_period)
tr_is_neg=$(echo $total_result'<='0.0 | bc -l)
tw_is_neg=$(echo $to_workload'<='0.0 | bc -l)
ik_is_neg=$(echo $in_kernel'<='0.0 | bc -l)
tk_is_neg=$(echo $to_kernel'<='0.0 | bc -l)
tq_is_neg=$(echo $to_quit'<='0.0 | bc -l)
data_is_valid=0
if [ $tr_is_neg -eq 1 ] || [ $tw_is_neg -eq 1 ] || [ $ik_is_neg -eq 1 ] || [ $tk_is_neg -eq 1 ] || [ $tq_is_neg -eq 1 ]; then
data_is_valid=1
else
# Insert results individually
total_result_ds+=($total_result)
to_workload_ds+=($to_workload)
in_kernel_ds+=($in_kernel)
to_kernel_ds+=($to_kernel)
to_quit_ds+=($to_quit)
fi
((num_iters+=1))
# If we are doing an (optional) scaling test, then we launch a permanent container
# between each of our 'test' containers. The aim being to see if our launch times
# are linear with the number of running containers or not
if [ -n "$SCALING" ]; then
sudo -E "${CTR_EXE}" run --runtime=${CTR_RUNTIME} -d ${IMAGE} test bash -c "tail -f /dev/null"
fi
}
# Writes a JSON with the measurements
# results per execution
write_individual_results() {
for i in "${!total_result_ds[@]}"; do
local json="$(cat << EOF
{
"total": {
"Result": ${total_result_ds[i]},
"Units": "s"
},
"to-workload": {
"Result": ${to_workload_ds[i]},
"Units": "s"
},
"in-kernel": {
"Result": ${in_kernel_ds[i]},
"Units": "s"
},
"to-kernel": {
"Result": ${to_kernel_ds[i]},
"Units": "s"
},
"to-quit": {
"Result": ${to_quit_ds[i]},
"Units": "s"
}
}
EOF
)"
metrics_json_add_array_element "$json"
done
}
init () {
TEST_ARGS="image=${IMAGE} runtime=${CTR_RUNTIME} units=seconds"
# We set the generic name here, but we save the different time results separately,
# and append the actual detail to the name at the time of saving...
TEST_NAME="boot times"
# If we are scaling, note that in the name
[ -n "$SCALING" ] && TEST_NAME="${TEST_NAME} scaling"
echo "Executing test: ${TEST_NAME} ${TEST_ARGS}"
check_cmds "${REQUIRED_CMDS[@]}"
# For non-VM runtimes, we don't use the output of dmesg, and
# we have seen it cause some test instabilities, so do not invo>
# it if not needed.
if [ "${CTR_RUNTIME}" == "io.containerd.runc.v2" ]; then
DMESGCMD=""
else
CALCULATE_KERNEL=1
DMESGCMD="; dmesg"
fi
# Start from a fairly clean environment
init_env
check_images "$IMAGE"
}
# Computes the average of the data
calc_avg_array() {
data=("$@")
avg=0
LSCALE=6
size="${#data[@]}"
[ -z "$data" ] && die "List of results was not passed to the calc_avg_array() function when trying to calculate the average result."
[ $size -eq 0 ] && die "Division by zero: The number of items is 0 when trying to calculate the average result."
sum=$(IFS='+'; echo "scale=4; ${data[*]}" | bc)
avg=$(echo "scale=$LSCALE; $sum / $size" | bc)
printf "%.0${CALC_SCALE}f" $avg
}
# Computes the standard deviation of the data
calc_sd_array() {
data=("$@")
sum_sqr_n=0
size=${#data[@]}
# LSCALE is the scale used for calculations in the middle
# CALC_SCALE is the scale used for the result
LSCALE=13
CALC_SCALE=6
[ -z "$data" ] && die "List results was not passed to the calc_sd_result() function when trying to calculate the standard deviation result."
[ $size -eq 0 ] && die "Division by zero: The number of items is 0 when trying to calculate the standard deviation result."
# [1] sum data
sum_data=$(IFS='+'; echo "scale=$LSCALE; ${data[*]}" | bc)
# [2] square the sum of data
pow_2_sum_data=$(echo "scale=$LSCALE; $sum_data ^ 2" | bc)
# [3] divide the square of data by the num of items
div_sqr_n=$(echo "scale=$LSCALE; $pow_2_sum_data / $size" | bc)
# [4] Sum of the sqr of each item
for i in "${data[@]}"; do
sqr_n=$(echo "scale=$LSCALE; $i ^ 2" | bc)
sum_sqr_n=$(echo "scale=$LSCALE; $sqr_n + $sum_sqr_n" | bc)
done
# substract [4] from [3]
subs=$(echo "scale=$LSCALE; $sum_sqr_n - $div_sqr_n" | bc)
# get variance
var=$(echo "scale=$LSCALE; $subs / $size" | bc)
# get standard deviation
sd=$(echo "scale=$LSCALE; sqrt($var)" | bc)
# if sd is zero, limit the decimal scale to 1 digit
sd_is_zero=$(echo $sd'=='0.0 | bc -l)
[ $sd_is_zero -eq 1 ] && CALC_SCALE=1
printf "%.0${CALC_SCALE}f" $sd
}
# Computes the Coefficient of variation.
# The result is given as percentage.
calc_cov_array() {
sd=$1
mean=$2
# LSCALE used for consider more decimals digits than usual in cov estimation.
# CALC_SCALE is the scale used to return the result.
LSCALE=13
CALC_SCALE=6
mean_is_zero=$(echo $mean'=='0.0 | bc -l)
[ -z "$sd" ] && die "Standard deviation was not passed to the calc_cov_array() function when trying to calculate the CoV result."
[ -z "$mean" ] && die "Mean was not passed to the calc_cov_array() function when trying to calculate the CoV result."
[ $mean_is_zero -eq 1 ] && die "Division by zero: Mean value passed is 0 when trying to get CoV result."
cov=$(echo "scale=$LSCALE; $sd / $mean" | bc)
cov=$(echo "scale=$LSCALE; $cov * 100" | bc)
# if cov is zero, limit the decimal scale to 1 digit
cov_is_zero=$(echo $cov'=='0.0 | bc -l)
[ $cov_is_zero -eq 1 ] && CALC_SCALE=1
printf "%.0${CALC_SCALE}f" $cov
}
# Writes a JSON with the statistics results
# for each launch time metric
write_stats_results() {
size="${#total_result_ds[@]}"
avg_total_result=$(calc_avg_array "${total_result_ds[@]}")
avg_to_workload=$(calc_avg_array "${to_workload_ds[@]}")
avg_in_kernel=$(calc_avg_array "${in_kernel_ds[@]}")
avg_to_kernel=$(calc_avg_array "${to_kernel_ds[@]}")
avg_to_quit=$(calc_avg_array "${to_quit_ds[@]}")
sd_total_result=$(calc_sd_array "${total_result_ds[@]}")
sd_to_workload=$(calc_sd_array "${to_workload_ds[@]}")
sd_in_kernel=$(calc_sd_array "${in_kernel_ds[@]}")
sd_to_kernel=$(calc_sd_array "${to_kernel_ds[@]}")
sd_to_quit=$(calc_sd_array "${to_quit_ds[@]}")
cov_total_result=$(calc_cov_array ${sd_total_result} ${avg_total_result})
cov_to_workload=$(calc_cov_array ${sd_to_workload} ${avg_to_workload})
cov_in_kernel=$(calc_cov_array ${sd_in_kernel} ${avg_in_kernel})
cov_to_kernel=$(calc_cov_array ${sd_to_kernel} ${avg_to_kernel})
cov_to_quit=$(calc_cov_array ${sd_to_quit} ${avg_to_quit})
local json="$(cat << EOF
{
"size": $size,
"total": {
"avg": $avg_total_result,
"sd": $sd_total_result,
"cov": $cov_total_result
},
"to-workload": {
"avg": $avg_to_workload,
"sd": $sd_to_workload,
"cov": $cov_to_workload
},
"in-kernel": {
"avg": $avg_in_kernel,
"sd": $sd_in_kernel,
"cov": $cov_in_kernel
},
"to-kernel_avg": {
"avg": $avg_to_kernel,
"sd": $sd_to_kernel,
"cov": $cov_to_kernel
},
"to-quit": {
"avg": $avg_to_quit,
"sd": $sd_to_quit,
"cov": $cov_to_quit
}
}
EOF
)"
metrics_json_add_array_element "$json"
}
help() {
usage=$(cat << EOF
Usage: $0 [-h] [options]
Description:
This script takes time measurements for different
stages of a boot/run/rm cycle
Options:
-h, Help
-i <name>, Image name (mandatory)
-n <n>, Number of containers to run (mandatory)
-s, Enable scaling (keep containers running)
EOF
)
echo "$usage"
}
main() {
local OPTIND
while getopts "dhi:n:s" opt;do
case ${opt} in
h)
help
exit 0;
;;
i)
IMAGE="${OPTARG}"
;;
n)
TIMES="${OPTARG}"
;;
s)
SCALING=true
;;
?)
# parse failure
help
die "Failed to parse arguments"
;;
esac
done
shift $((OPTIND-1))
[ -z "$IMAGE" ] && help && die "Mandatory IMAGE name not supplied"
[ -z "$TIMES" ] && help && die "Mandatory nunmber of containers not supplied"
# Although this is mandatory, the 'lib/common.bash' environment can set
# it, so we may not fail if it is not set on the command line...
[ -z "$RUNTIME" ] && help && die "Mandatory runtime argument not supplied"
init
j=0
max_reps=$MAX_REPETITIONS
while [ $j -lt $TIMES ]; do
echo " run $num_iters"
run_workload
if [ $data_is_valid -eq 0 ]; then
j=$(( j + 1 ))
# if valid result then reset 'max_reps' to initial value
max_reps=$MAX_REPETITIONS
continue
fi
echo "Skipping run due to invalid result"
((max_reps-=1))
if [ $max_reps -lt 0 ]; then
die "Max. num of repetitions reached for run: $j"
fi
done
metrics_json_init
metrics_json_start_array
write_stats_results
metrics_json_end_array "Statistics"
metrics_json_start_array
write_individual_results
metrics_json_end_array "Results"
metrics_json_save
clean_env_ctr
}
main "$@"