Skip to content

Commit

Permalink
Merge pull request #363 from jingyuanliang/wdover
Browse files Browse the repository at this point in the history
Fix cilium watchdog to write CNI config at least once
  • Loading branch information
google-oss-prow[bot] authored Sep 27, 2024
2 parents 2007892 + 0b69c09 commit 3315513
Show file tree
Hide file tree
Showing 6 changed files with 373 additions and 2 deletions.
10 changes: 8 additions & 2 deletions scripts/install-cni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -475,17 +475,23 @@ cilium_watchdog_success_wait=${CILIUM_WATCHDOG_SUCCESS_WAIT:-300}
cilium_watchdog_failure_retry=${CILIUM_WATCHDOG_FAILURE_RETRY:-60}
cilium_watchdog_fast_start_wait=${CILIUM_WATCHDOG_FAST_START_WAIT:-60}

file_written=false

if [[ -n "${CILIUM_FAST_START_NAMESPACES:-}" ]]; then
log "Cilium has fast-start; writing CNI config upfront then wait for ${cilium_watchdog_fast_start_wait}s and start to check Cilium health."
write_file "${output_file}" "${cni_spec}"
file_written=true
sleep "${cilium_watchdog_fast_start_wait}"s
fi

while true; do
log "Checking Cilium health allowing retries for up to ${cilium_watchdog_failure_retry}s."
if cilium_health_check "${cilium_watchdog_failure_retry}"; then
log "Cilium healthz reported success; writing CNI config if not already there then wait for ${cilium_watchdog_success_wait}s."
[[ ! -f "${output_file}" ]] && write_file "${output_file}" "${cni_spec}"
log "Cilium healthz reported success; writing CNI config if never written or not already there then wait for ${cilium_watchdog_success_wait}s."
if [[ ${file_written} != "true" ]] || [[ ! -f "${output_file}" ]]; then
write_file "${output_file}" "${cni_spec}"
file_written=true
fi
sleep "${cilium_watchdog_success_wait}"s
else
log "Cilium does not appear healthy; removing CNI config if it exists then wait for 2s before retry."
Expand Down
66 changes: 66 additions & 0 deletions scripts/testcase/testcase-basic-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=false
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*)
#unsupported
exit 1
esac
}
export -f curl

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-basic.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}
71 changes: 71 additions & 0 deletions scripts/testcase/testcase-cilium-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=true
export CILIUM_HEALTHZ_PORT=63197
export CILIUM_FAST_START_NAMESPACES=
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*http://localhost:63197/*)
echo 'healthz'
;;
*)
#unsupported
exit 1
esac
}
export -f curl

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-cilium.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}
83 changes: 83 additions & 0 deletions scripts/testcase/testcase-watchdog-cilium-faststart-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=true
export CILIUM_HEALTHZ_PORT=63197
export CILIUM_FAST_START_NAMESPACES=default,kube-system
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false
export RUN_CNI_WATCHDOG=true

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

export TEST_WANT_EXIT_CODE=24

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*http://localhost:63197/*)
# With fast-start enabled, CNI config should have been written
# at the first Cilium health check attempt.
exit "${TEST_WANT_EXIT_CODE}"
;;
*)
#unsupported
exit 1
esac
}
export -f curl

# shellcheck disable=SC2317,SC2329
function sleep() {
echo "[MOCK called] sleep $*"
echo "[MOCK] this test expects a delay during fast start."
}
export -f sleep

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-cilium-faststart.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}
75 changes: 75 additions & 0 deletions scripts/testcase/testcase-watchdog-cilium-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=true
export CILIUM_HEALTHZ_PORT=63197
export CILIUM_FAST_START_NAMESPACES=
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false
export RUN_CNI_WATCHDOG=true

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

# shellcheck disable=SC2034
TEST_WANT_EXIT_CODE=${TEST_EXIT_CODE_SLEEP}

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*http://localhost:63197/*)
echo 'healthz'
;;
*)
#unsupported
exit 1
esac
}
export -f curl

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-cilium.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}
70 changes: 70 additions & 0 deletions scripts/testcase/testcase-watchdog-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=false
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false
export RUN_CNI_WATCHDOG=true

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

# shellcheck disable=SC2034
TEST_WANT_EXIT_CODE=${TEST_EXIT_CODE_SLEEP}

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*)
#unsupported
exit 1
esac
}
export -f curl

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-basic.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}

0 comments on commit 3315513

Please sign in to comment.