From c376c51479da60a24fe2f047e59fbdc6b517b3d2 Mon Sep 17 00:00:00 2001 From: kaysond Date: Thu, 9 Dec 2021 22:10:47 -0800 Subject: [PATCH] actually fix race conditions in testing --- test/test-dind-swarm.bats | 109 +++++++++++++++++--------------------- test/test.bats | 4 +- 2 files changed, 52 insertions(+), 61 deletions(-) diff --git a/test/test-dind-swarm.bats b/test/test-dind-swarm.bats index 1f7ceef..f680073 100644 --- a/test/test-dind-swarm.bats +++ b/test/test-dind-swarm.bats @@ -1,60 +1,62 @@ setup_file() { - #Only run these on the manager + #Only run this on the manager if docker node ls &> /dev/null; then - #Wait for images to finish building on container startup for 45s - while ! docker image ls | grep -q whoami; do - if (( ++i > 12 )); then - echo "Timed out waiting for images to build" >&2 - docker image ls >&2 - exit 1 - fi + #Wait up to 3min for test swarm to reach desired state + READY="" + i=0 + LIMIT=36 + while [[ -z "$READY" ]]; do sleep 5 - done + READY="true" + ERRORS=() - docker stack deploy -c /opt/trafficjam/test/docker-compose-dind-swarm.yml test + #Images are built + if ! docker image ls |& grep -q whoami; then + READY="" + ERRORS=("${ERRORS[@]}" "Images aren't built" "$(docker image ls)") + fi - #Wait for containers to startup for 60s - i=0 - while [[ "$(docker ps | wc -l)" != "7" ]]; do - if (( ++i > 12 )); then - echo Timed out waiting for container startup >&2 - docker ps >&2 - exit 1 + #All containers are started + if [[ "$(docker ps 2> /dev/null | wc -l)" != "7" ]]; then + READY="" + ERRORS=("${ERRORS[@]}" "Containers aren't started" "$(docker ps)") fi - sleep 5 - done - #Wait for load balancer ips to get reported for 120s - i=0 - while ! docker inspect --format '{{ .Config.Env }}' $(docker ps --quiet --filter 'name=trafficjam_FDB2E498') | grep -q -E "LOAD_BALANCER_IPS=172\.23\.0\.[[:digit:]]{1,3} 172\.23\.0\.[[:digit:]]{1,3}"; do - if (( ++i > 24 )); then - echo Timed out waiting for load balancer IPs to be reported >&2 - docker inspect --format '{{ .Config.Env }}' $(docker ps --quiet --filter 'name=trafficjam_FDB2E498') >&2 - exit 1 + if docker service ls | grep -q trafficjam_FDB2E498; then + #Two trafficjam tasks exist with LOAD_BALANCER_IPS env vars set + if [[ "$(docker inspect --format '{{ .Spec.ContainerSpec.Env }}' $(docker service ps --quiet --filter desired-state=running trafficjam_FDB2E498) | \ + grep -cE 'LOAD_BALANCER_IPS=172\.23\.0\.[[:digit:]]{1,3} 172\.23\.0\.[[:digit:]]{1,3}')" != "2" ]]; then + READY="" + ERRORS=("${ERRORS[@]}" "trafficjam tasks aren't ready" "$(docker inspect --format '{{ .Spec.ContainerSpec.Env }}' $(docker service ps --quiet --filter desired-state=running trafficjam_FDB2E498))") + fi + + #All rules are added on both running trafficjam tasks + for TASKID in $(docker service ps trafficjam_FDB2E498 | grep Running | cut -d' ' -f1); do + if [[ "$(docker service logs trafficjam_FDB2E498 | grep "$TASKID" | awk -F']' '{ print $2 }' | grep -v Whitelisted | tail -n 6 | grep -c 'DEBUG: Error Count: 0')" != "2" ]]; then + READY="" + ERRORS=("${ERRORS[@]}" "rules are not added on task $TASKID" "$(docker logs $(docker ps --quiet --filter 'name=trafficjam_FDB2E498') | awk -F']' '{ print $2 }' | grep -v Whitelisted | tail -n 6)") + fi + done + else + READY="" + ERRORS=("${ERRORS[@]}" "trafficjam service doesn't exist" "$(docker service ls)") fi - sleep 5 - done - #Wait for all rules to get added (causing log entries to repeat) for 120s - i=0 - while [[ "$(docker logs $(docker ps --quiet --filter 'name=trafficjam_FDB2E498') | awk -F']' '{ print $2 }' | grep -v Whitelisted | tail -n 6 | grep -c 'DEBUG: Error Count: 0')" != "2" ]]; do - if (( ++i > 24 )); then - echo Timed out waiting for rules to be added >&2 - docker logs $(docker ps --quiet --filter 'name=trafficjam_FDB2E498') | awk -F']' '{ print $2 }' | grep -v Whitelisted | tail -n 6 >&2 - exit 1 + #All whoami servicecs are running + if [[ "$(docker inspect --format '{{ .Status.State }}' $(docker service ps -q test_public1 | head -n1))" != "running" || \ + "$(docker inspect --format '{{ .Status.State }}' $(docker service ps -q test_public2 | head -n1))" != "running" || \ + "$(docker inspect --format '{{ .Status.State }}' $(docker service ps -q test_private1 | head -n1))" != "running" ]]; then + READY="" + ERRORS=("${ERRORS[@]}" "whoami services aren't ready" "$(docker service ls)" "$(docker service ps test_public1)" "$(docker service ps test_public2)" "$(docker service ps test_private1)" ) fi - sleep 5 - done - #Wait for all whoami services to startup for 120s - i=0 - while [[ "$(docker service ls | grep whoami | awk '{ print $4 }')" != "$(printf '2/2\n2/2\n2/2\n')" ]]; do - if (( ++i > 24 )); then - echo Timed out waiting for services to settle >&2 - docker service ls | grep whoami + if (( i >= LIMIT )); then + echo "Timed out waiting for swarm state to converge" >&2 + IFS='\n' + echo -e "${ERRORS[@]}" >&2 + IFS=' \n\t' exit 1 fi - sleep 5 done fi export RP_ID=$(docker ps --quiet --filter 'name=test_reverseproxy') @@ -67,14 +69,8 @@ setup_file() { @test "whitelisted containers can communicate with all other containers on the specified network" { #Each is run twice to hit both nodes - docker exec "$RP_ID" ping -c 2 -w 10 test_public1 - docker exec "$RP_ID" ping -c 2 -w 10 test_public1 - + docker exec "$RP_ID" curl --verbose --max-time 5 test_public1:8000 || { docker service logs trafficjam_FDB2E498; docker service logs test_public1; exit 1; } docker exec "$RP_ID" curl --verbose --max-time 5 test_public1:8000 - docker exec "$RP_ID" curl --verbose --max-time 5 test_public1:8000 - - docker exec "$RP_ID" ping -c 2 -w 10 test_public2 - docker exec "$RP_ID" ping -c 2 -w 10 test_public2 docker exec "$RP_ID" curl --verbose --max-time 5 test_public2:8000 docker exec "$RP_ID" curl --verbose --max-time 5 test_public2:8000 @@ -83,8 +79,6 @@ setup_file() { @test "containers on the specified network can not communicate with one another" { run docker exec "$TPU1_ID" ping -c 2 -w 10 test_public2 [ "$status" -eq 1 ] - run docker exec "$TPU1_ID" ping -c 2 -w 10 test_public2 - [ "$status" -eq 1 ] run docker exec "$TPU1_ID" curl --verbose --max-time 5 test_public2:8000 [ "$status" -eq 7 -o "$status" -eq 28 ] @@ -95,8 +89,6 @@ setup_file() { @test "containers on the specified network can not communicate with one another (opposite direction)" { run docker exec "$TPU2_ID" ping -c 2 -w 10 test_public1 [ "$status" -eq 1 ] - run docker exec "$TPU2_ID" ping -c 2 -w 10 test_public1 - [ "$status" -eq 1 ] run docker exec "$TPU2_ID" curl --verbose --max-time 5 test_public1:8000 [ "$status" -eq 7 -o "$status" -eq 28 ] @@ -113,11 +105,8 @@ setup_file() { } @test "containers on non-specified networks can communicate" { - docker exec "$TPR1_ID" ping -c 2 -w 10 test_reverseproxy - docker exec "$TPR1_ID" ping -c 2 -w 10 test_reverseproxy - - docker exec "$RP_ID" ping -c 2 -w 10 test_private1 - docker exec "$RP_ID" ping -c 2 -w 10 test_private1 + docker exec "$TPR1_ID" curl --verbose --max-time 5 test_reverseproxy + docker exec "$TPR1_ID" curl --verbose --max-time 5 test_reverseproxy docker exec "$RP_ID" curl --verbose --max-time 5 test_private1:8000 docker exec "$RP_ID" curl --verbose --max-time 5 test_private1:8000 diff --git a/test/test.bats b/test/test.bats index ccad09d..189b24d 100644 --- a/test/test.bats +++ b/test/test.bats @@ -29,9 +29,11 @@ @test "Deploy the swarm environment" { docker-compose --file "$BATS_TEST_DIRNAME"/docker-compose-swarm.yml --project-name trafficjam_test_swarm up --detach - sleep 5 #Wait for the daemons to start + sleep 5 docker exec swarm-manager docker swarm init docker exec swarm-worker $(docker exec swarm-manager docker swarm join-token worker | grep "join --token") + sleep 5 + docker exec swarm-manager docker stack deploy -c /opt/trafficjam/test/docker-compose-dind-swarm.yml test } @test "Test the swarm manager" {