Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: system test parallelization: two-pass approach #23275

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ alt_build_task:
only_if: &no_rhel_release |
$CIRRUS_BRANCH !=~ 'v[0-9\.]+-rhel' &&
$CIRRUS_BASE_BRANCH !=~ 'v[0-9\.]+-rhel'
skip: $CI == $CI
env:
<<: *stdenvars
TEST_FLAVOR: "altbuild"
Expand Down Expand Up @@ -261,6 +262,7 @@ osx_alt_build_task:
alias: osx_alt_build
# Docs: ./contrib/cirrus/CIModes.md
only_if: *no_rhel_release # RHEL never releases podman mac installer binary
skip: $CI == $CI
persistent_worker: &mac_pw
labels:
os: darwin
Expand Down Expand Up @@ -441,6 +443,7 @@ win_installer_task:
CONTAINERS_MACHINE_PROVIDER: 'hyperv'
alias: win_installer
only_if: *no_rhel_release
skip: $CI == $CI
depends_on: *build
ec2_instance: &windows
image: "${WINDOWS_AMI}"
Expand Down Expand Up @@ -627,6 +630,7 @@ local_integration_test_task: &local_integration_test_task
changesInclude('.cirrus.yml', 'Makefile', 'contrib/cirrus/**', 'vendor/**', 'test/tools/**', 'test/registries*.conf', 'hack/**', 'version/rawversion/*') ||
changesInclude('test/e2e/**', 'test/utils/**') ||
(changesInclude('**/*.go', '**/*.c', '**/*.h') && !changesIncludeOnly('test/**', 'pkg/machine/e2e/**'))
skip: $CI == $CI
depends_on: *build
matrix: *platform_axis
# integration tests scale well with cpu as they are parallelized
Expand All @@ -650,6 +654,7 @@ local_integration_test_task: &local_integration_test_task
remote_integration_test_task:
<<: *local_integration_test_task
alias: remote_integration_test
skip: $CI == $CI
env:
TEST_FLAVOR: int
PODBIN_NAME: remote
Expand All @@ -662,6 +667,7 @@ container_integration_test_task:
alias: container_integration_test
# Docs: ./contrib/cirrus/CIModes.md
only_if: *only_if_int_test
skip: $CI == $CI
depends_on: *build
matrix: &fedora_vm_axis
- env:
Expand Down Expand Up @@ -689,6 +695,7 @@ rootless_integration_test_task:
alias: rootless_integration_test
# Docs: ./contrib/cirrus/CIModes.md
only_if: *only_if_int_test
skip: $CI == $CI
depends_on: *build
matrix: *platform_axis
gce_instance: *fastvm
Expand All @@ -712,6 +719,7 @@ podman_machine_task:
$CIRRUS_CHANGE_TITLE =~ '.*CI:ALL.*' ||
changesInclude('.cirrus.yml', 'Makefile', 'contrib/cirrus/**', 'vendor/**', 'test/tools/**', 'test/registries*.conf', 'hack/**', 'version/rawversion/*') ||
changesInclude('cmd/podman/machine/**', 'pkg/machine/**', '**/*machine*.go')
skip: $CI == $CI
depends_on: *build
ec2_instance:
image: "${VM_IMAGE_NAME}"
Expand All @@ -734,6 +742,7 @@ podman_machine_aarch64_task:
name: *std_name_fmt
alias: podman_machine_aarch64
only_if: *only_if_machine_test
skip: $CI == $CI
depends_on: *build
ec2_instance:
<<: *standard_build_ec2_aarch64
Expand Down Expand Up @@ -764,9 +773,10 @@ podman_machine_windows_task:
# everywhere to do so here it would mean we would need duplicate the
# full big only_if condition which is more difficult to maintain so
# use the skip here.
skip: &skip_rhel_release |
$CIRRUS_BRANCH =~ 'v[0-9\.]+-rhel' ||
$CIRRUS_BASE_BRANCH =~ 'v[0-9\.]+-rhel'
# skip: &skip_rhel_release |
# $CIRRUS_BRANCH =~ 'v[0-9\.]+-rhel' ||
# $CIRRUS_BASE_BRANCH =~ 'v[0-9\.]+-rhel'
skip: $CI == $CI
depends_on: *build
ec2_instance:
<<: *windows
Expand All @@ -792,7 +802,8 @@ podman_machine_mac_task:
name: *std_name_fmt
alias: podman_machine_mac
only_if: *only_if_machine_test
skip: *skip_rhel_release
# skip: *skip_rhel_release
skip: $CI == $CI
depends_on: *build
persistent_worker: *mac_pw
timeout_in: 35m
Expand Down Expand Up @@ -967,6 +978,7 @@ buildah_bud_test_task:
$CIRRUS_CHANGE_TITLE =~ '.*CI:ALL.*' ||
changesInclude('.cirrus.yml', 'Makefile', 'contrib/cirrus/**', 'vendor/**', 'test/tools/**', 'test/registries*.conf', 'hack/**', 'version/rawversion/*') ||
changesInclude('**/*build*.go', 'test/buildah-bud/**')
skip: $CI == $CI
depends_on: *build
env:
<<: *stdenvars
Expand Down Expand Up @@ -996,6 +1008,7 @@ upgrade_test_task:
changesInclude('.cirrus.yml', 'Makefile', 'contrib/cirrus/**', 'vendor/**', 'test/tools/**', 'test/registries*.conf', 'hack/**', 'version/rawversion/*') ||
changesInclude('test/upgrade/**', 'test/system/*.bash') ||
(changesInclude('**/*.go', '**/*.c', '**/*.h') && !changesIncludeOnly('test/**', 'pkg/machine/e2e/**'))
skip: $CI == $CI
depends_on: *build
matrix:
- env:
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,5 @@ require (
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
tags.cncf.io/container-device-interface/specs-go v0.8.0 // indirect
)

replace github.com/nxadm/tail => github.com/Luap99/tail v0.0.0-20240626140224-ad4e60e8be25
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg6
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0=
github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/Luap99/tail v0.0.0-20240626140224-ad4e60e8be25 h1:fz7HD7A+DFIBortMJp4kCr0WqU5FXjQHPkXPMTHOsrw=
github.com/Luap99/tail v0.0.0-20240626140224-ad4e60e8be25/go.mod h1:OTaG3NK980DZzxbRq6lEuzgU+mug70nY11sMd4JXXHc=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/Microsoft/hcsshim v0.12.9 h1:2zJy5KA+l0loz1HzEGqyNnjd3fyZA31ZBCGKacp6lLg=
Expand Down Expand Up @@ -386,8 +388,6 @@ github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
github.com/nxadm/tail v1.4.11/go.mod h1:OTaG3NK980DZzxbRq6lEuzgU+mug70nY11sMd4JXXHc=
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg=
Expand Down
10 changes: 8 additions & 2 deletions hack/bats
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ export PODMAN_ROOTLESS_USER=$(id -un)
# Make sure to always check for leaks when running locally
export PODMAN_BATS_LEAK_CHECK=1

# FIXME
export BATS_LOGDIR=$(mktemp -d --tmpdir podman-bats-logs.XXXXXXXXX)

# Root
if [[ "$TEST_ROOT" ]]; then
echo "# bats ${bats_opts[*]} ${bats_filter[*]} $TESTS"
Expand All @@ -140,15 +143,18 @@ if [[ "$TEST_ROOT" ]]; then
--preserve-env=OCI_RUNTIME \
--preserve-env=CONTAINERS_HELPER_BINARY_DIR \
--preserve-env=PODMAN_ROOTLESS_USER \
bats "${bats_opts[@]}" "${bats_filter[@]}" $TESTS
--preserve-env=BATS_LOGDIR \
bats "${bats_opts[@]}" "${bats_filter[@]}" \
--gather-test-outputs-in "$BATS_LOGDIR/root" $TESTS
rc=$?
fi

# Rootless. (Only if we're not already root)
if [[ "$TEST_ROOTLESS" && "$(id -u)" != 0 ]]; then
echo "--------------------------------------------------"
echo "\$ bats ${bats_opts[*]} ${bats_filter[*]} $TESTS"
bats "${bats_opts[@]}" "${bats_filter[@]}" $TESTS
bats "${bats_opts[@]}" "${bats_filter[@]}" \
--gather-test-outputs-in "$BATS_LOGDIR/rootless" $TESTS
rc=$((rc | $?))
fi

Expand Down
3 changes: 2 additions & 1 deletion test/system/030-run.bats
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,7 @@ json-file | f
# exactly 10 seconds. Give it some leeway.
delta_t=$(( $t1 - $t0 ))
assert "$delta_t" -gt 1 "podman stop: ran too quickly!"
# FIXME: can fail under load, take 7 seconds
assert "$delta_t" -le 6 "podman stop: took too long"

run_podman rm $cname
Expand Down Expand Up @@ -890,7 +891,7 @@ EOF

# bats test_tags=ci:parallel
@test "podman run --hostuser tests" {
skip_if_not_rootless "test whether hostuser is successfully added"
skip_if_not_rootless "--hostuser is only meaningful when rootless"
user=$(id -un)
run_podman 1 run --rm $IMAGE grep $user /etc/passwd
run_podman run --hostuser=$user --rm $IMAGE grep $user /etc/passwd
Expand Down
8 changes: 8 additions & 0 deletions test/system/035-logs.bats
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,14 @@ function _log_test_follow_since() {
run_podman ${events_backend} run --log-driver=$driver --name $cname -d $IMAGE \
sh -c "sleep 1; while :; do echo $content && sleep 1; done"

# FIXME FIXME: TEMPORARY! For debugging a bug I don't even remember any more
logpath=
if [[ "$driver" = "k8s-file" ]]; then
run_podman inspect --format '{{.HostConfig.LogConfig.Path}}' $cname
logpath="$output"
fi
# FIXME FIXME

# sleep is required to make sure the podman event backend no longer sees the start event in the log
# This value must be greater or equal than the value given in --since below
sleep 0.2
Expand Down
1 change: 1 addition & 0 deletions test/system/080-pause.bats
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ load helpers

# There should be a 3-4 second gap, *maybe* 5. Never 1 or 2, that
# would imply that the container never paused.
# FIXME: under high load, can be 7
is "$max_delta" "[3456]" "delta t between paused and restarted"

run_podman rm -t 0 -f $cname
Expand Down
6 changes: 6 additions & 0 deletions test/system/220-healthcheck.bats
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ Log[-1].ExitCode | 0
Log[-1].Output | \"Life is Good on stdout\\\nLife is Good on stderr\\\n\"
" "$current_time" "healthy"

# FIXME FIXME FIXME: 20240918: there's a race here, wherein _check_health()
# can see a "healthy" that comes from before 'touch uh-oh'. One way to
# fix that might be to add another arg to _check_health, 'FailingStreak'.
# That doesn't show up in podman-events, though, so we'd have to
# run podman-inspect in a loop, and that introduces its own races.
# I don't have a good answer here. See log.103
current_time=$(date --iso-8601=ns)
# Force a failure
run_podman exec $ctrname touch /uh-oh
Expand Down
1 change: 1 addition & 0 deletions test/system/250-systemd.bats
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
run_podman exec $cname touch /uh-oh

# healthcheck should now fail, with exit status 1 and 'unhealthy' output
# FIXME: race: on high load, we can get "Error: no container with ID xxxx"
run_podman 1 healthcheck run $cname
is "$output" "unhealthy" "output from 'podman healthcheck run'"

Expand Down
14 changes: 11 additions & 3 deletions test/system/255-auto-update.bats
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@ function setup() {
}

function teardown() {
ls -l /run/netns | sed -e "s/^/# teardown /" >&3
if [[ -e $SNAME_FILE ]]; then
while read line; do
if [[ "$line" =~ "podman-auto-update" ]]; then
echo "Stop timer: $line.timer"
systemctl stop $line.timer
systemctl disable $line.timer
else
ls -l /run/netns | sed -e "s/^/# before stop $line /" >&3
systemctl stop $line
ls -l /run/netns | sed -e "s/^/# after stop $line /" >&3
fi
rm -f $UNIT_DIR/$line.{service,timer}
done < $SNAME_FILE
Expand Down Expand Up @@ -66,12 +69,12 @@ function generate_service() {

# Unless specified, set a default command.
if [[ -z "$command" ]]; then
command="top -d 120"
command="top -d $((100 + BATS_SUITE_TEST_NUMBER))"
fi

# Container name. Include the autoupdate type, to make debugging easier.
# IMPORTANT: variable 'cname' is passed (out of scope) up to caller!
cname=c_${autoupdate//\'/}_$(random_string)
cname="c-$(safename)-${autoupdate//\'/}-$(random_string)"
target_img="quay.io/libpod/$target_img_basename:latest"
if [[ -n "$7" ]]; then
target_img="$7"
Expand Down Expand Up @@ -172,7 +175,7 @@ function _confirm_update() {

# This test can fail in dev. environment because of SELinux.
# quick fix: chcon -t container_runtime_exec_t ./bin/podman
@test "podman auto-update - label io.containers.autoupdate=image" {
@test "podman auto-update - label io.containers.autoupdate=imagexxxxxxx" {
since=$(date --iso-8601=seconds)
run_podman auto-update
is "$output" ""
Expand Down Expand Up @@ -214,6 +217,11 @@ function _confirm_update() {
run_podman container inspect --format "{{.ID}}" $ctr_child
run_podman container inspect --format "{{.State.Status}}" $ctr_child
is "$output" "running" "child container is in running state"

ls -l /run/netns | sed -e 's/^/# before container rm /' >&3
run_podman container rm -f -t0 $ctr_child
run_podman container rm -f -t0 $ctr_parent
ls -l /run/netns | sed -e 's/^/# after container rm /' >&3
}

@test "podman auto-update - label io.containers.autoupdate=image with rollback" {
Expand Down
7 changes: 7 additions & 0 deletions test/system/260-sdnotify.bats
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,15 @@ none | false | false | 0
export NOTIFY_SOCKET=$PODMAN_TMPDIR/notify-$(safename).sock
_start_socat

echo "ls -l $PODMAN_TMPDIR/"
ls -l $PODMAN_TMPDIR/

run_podman push $registry_flags $IMAGE $image_on_local_registry

# Again
echo "ls -l $PODMAN_TMPDIR/"
ls -l $PODMAN_TMPDIR/

run_podman pull $registry_flags $image_on_local_registry
is "${lines[1]}" "Pulling image //$image_on_local_registry inside systemd: setting pull timeout to 5m0s" "NOTIFY_SOCKET is passed to container"

Expand Down
12 changes: 12 additions & 0 deletions test/system/331-system-check.bats
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,21 @@
# that they are caught and remedied, even if it requires discarding some
# data in read-write layers.
#
# DO NOT PARALLELIZE. All of these tests require complete control of images.
#

load helpers

function setup_file() {
# Pristine setup: no pods, containers, volumes, images
run_podman pod rm -a -f
run_podman rm -f -a -t0
run_podman volume rm -a
run_podman image rm -f -a

_prefetch $IMAGE
}

@test "podman system check - unmanaged layers" {
run_podman_testing create-storage-layer
layerID="$output"
Expand Down
9 changes: 5 additions & 4 deletions test/system/700-play.bats
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ RELABEL="system_u:object_r:container_file_t:s0"
# Run `play kube` in the background as it will wait for the service
# container to exit.
timeout --foreground -v --kill=10 60 \
$PODMAN play kube --service-container=true --log-driver journald $TESTYAML &>/dev/null &
$PODMAN --syslog play kube --service-container=true --log-driver journald $TESTYAML &>/dev/null &

# Wait for the container to be running
container_a=$PODCTRNAME
Expand Down Expand Up @@ -200,7 +200,7 @@ RELABEL="system_u:object_r:container_file_t:s0"
is "$output" "true"

# Restart the pod, make sure the service is running again
run_podman pod restart $PODNAME
run_podman --syslog pod restart $PODNAME
run_podman container inspect $service_container --format "{{.State.Running}}"
is "$output" "true"

Expand All @@ -211,13 +211,13 @@ RELABEL="system_u:object_r:container_file_t:s0"
is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"

# Kill the pod and make sure the service is not running
run_podman pod kill $PODNAME
run_podman --syslog pod kill $PODNAME
_ensure_container_running $service_container false

run_podman network ls

# Remove the pod and make sure the service is removed along with it
run_podman pod rm $PODNAME
run_podman --syslog pod rm $PODNAME
run_podman 1 container exists $service_container
}

Expand Down Expand Up @@ -693,6 +693,7 @@ spec:
if [[ -n "$PARALLEL_JOBSLOT" ]]; then
expect=$((expect + 4))
fi
# FIXME: under high load, delta_t can be 12
assert $delta_t -le $expect \
"podman kube play did not get killed within $expect seconds"
# Make sure we actually got SIGTERM and podman printed its message.
Expand Down
2 changes: 2 additions & 0 deletions test/system/setup_suite.bash
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ function teardown_suite() {
stop_registry
local exit_code=$?

run_podman '?' rmi $(pause_image)

# At end, if all tests have passed, check for leaks.
# Don't do this if there were errors: failing tests may not clean up.
if [[ -e "$BATS_SUITE_TMPDIR/all-tests-passed" ]]; then
Expand Down
Loading