Skip to content

Commit

Permalink
netd-init tweaks for performance
Browse files Browse the repository at this point in the history
  • Loading branch information
jingyuanliang committed May 21, 2024
1 parent cdfecbb commit 6c9f963
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 60 deletions.
4 changes: 3 additions & 1 deletion scripts/build-toybox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ set -exu
# we're using it for display only so we should be good.
# Toybox sh has more TODOs in multiple areas so don't use it.
# Command mkdir is used for tests only.
toys="base64 mkdir mktemp mv route sort timeout"
# Commands nice and renice are not used directly, but included
# for caller's (k8s manifest's) convenience.
toys="base64 mkdir mktemp mv nice renice route sort timeout"

cd /toybox-*/

Expand Down
122 changes: 63 additions & 59 deletions scripts/install-cni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.

log() {
echo "$@"
}

# shellcheck disable=SC2317,SC2329 # when called with $1=calico_ready
calico_ready() {
echo "Listing items matching /host/etc/cni/net.d/*calico*.conflist"
echo "(this action repeats during bootstrap until a match is found)"
log "Listing items matching /host/etc/cni/net.d/*calico*.conflist"
log "(this action repeats during bootstrap until a match is found)"
# The command producing exit status must be the last command here.
compgen -G "/host/etc/cni/net.d/*calico*.conflist"
}

# shellcheck disable=SC2317,SC2329 # when called with $1=cni_ready
cni_ready() {
local -r cni_bin="$1"
echo "Running '/host/home/kubernetes/bin/${cni_bin}' with CNI_COMMAND=VERSION"
log "Running '/host/home/kubernetes/bin/${cni_bin}' with CNI_COMMAND=VERSION"
# It's necessary to try running it instead of just checking existence because
# the CNI installer might not do atomic write (write to temporary file then move).
echo "(errors are expected during bootstrap; will retry until success)"
log "(errors are expected during bootstrap; will retry until success)"
# The command producing exit status must be the last command here.
# Send errors to stdout since they're "expected" errors.
# This redirection doesn't affect exit status after execution.
Expand All @@ -45,9 +49,7 @@ if [[ -n "$1" ]]; then
exit
fi

BUILD='__BUILD__'

echo "Install-CNI ($0), Build: $BUILD"
log "Install-CNI ($0), Build: __BUILD__"

set -u -e

Expand All @@ -56,13 +58,13 @@ set -u -e
# All exit paths should call `success` instead of `exit 0`,
# so the appropriate actions can be taken according to RUN_CNI_WATCHDOG.
success() {
echo "Install-CNI execution completed successfully."
log "Install-CNI execution completed successfully."
if [[ "${RUN_CNI_WATCHDOG:-}" != "true" ]]; then
echo "Not running CNI watchdog; exiting now."
log "Not running CNI watchdog; exiting now."
exit 0
fi
while true; do
echo "Running CNI watchdog; sleeping infinity now."
log "Running CNI watchdog; sleeping infinity now."
sleep infinity
done
# In case of anything unexpected, signal failure.
Expand All @@ -74,25 +76,25 @@ success() {
#
# If this script is being run in order to generate the Calico config file, then skip this
# check.
echo "Calico network policy enabled: '${ENABLE_CALICO_NETWORK_POLICY:-}'; write config: '${WRITE_CALICO_CONFIG_FILE:-}'"
log "Calico network policy enabled: '${ENABLE_CALICO_NETWORK_POLICY:-}'; write config: '${WRITE_CALICO_CONFIG_FILE:-}'"
if [[ "${ENABLE_CALICO_NETWORK_POLICY:-}" == "true" && "${WRITE_CALICO_CONFIG_FILE:-}" != "true" ]]; then
# inotify calls back to the beginning of this script.
# `timeout` exits failure when it's exiting due to time out, but this is an
# expected situation when Calico is being disabled (see below).
if timeout 120s inotify /host/etc/cni/net.d '' "$0" calico_ready; then
echo "Calico has written CNI config files. No action needed here."
log "Calico has written CNI config files. No action needed here."
success
else
echo "inotify for Calico CNI configuration files failed or timed out (status: $?)."
log "inotify for Calico CNI configuration files failed or timed out (status: $?)."
# This handles the disabling process: https://github.com/GoogleCloudPlatform/netd/issues/91
ENABLE_CALICO_NETWORK_POLICY=false
echo "Update calico network policy config to ${ENABLE_CALICO_NETWORK_POLICY}"
log "Update calico network policy config to ${ENABLE_CALICO_NETWORK_POLICY}"
fi
fi

cni_spec=${CALICO_CNI_SPEC_TEMPLATE:-${CNI_SPEC_TEMPLATE:-}}
if [[ -z "${cni_spec}" ]]; then
echo "No CNI spec template or empty template is specified. Not taking actions."
log "No CNI spec template or empty template is specified. Not taking actions."
success
fi

Expand All @@ -102,13 +104,13 @@ else
cni_spec=${cni_spec//@cniType/ptp}
fi

if [ "${ENABLE_BANDWIDTH_PLUGIN}" == "true" ] && [ -f "/host/home/kubernetes/bin/bandwidth" ]; then
if [ "${ENABLE_BANDWIDTH_PLUGIN:-}" == "true" ] && [ -f "/host/home/kubernetes/bin/bandwidth" ]; then
cni_spec=${cni_spec//@cniBandwidthPlugin/, {\"type\": \"bandwidth\", \"capabilities\": {\"bandwidth\": true\}\}}
else
cni_spec=${cni_spec//@cniBandwidthPlugin/}
fi

token=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)
token=$(</var/run/secrets/kubernetes.io/serviceaccount/token)
host=${KUBERNETES_SERVICE_HOST}
# If host contains a colon (:), it is an IPv6 address, hence needs wrapping
# with [..].
Expand All @@ -120,7 +122,7 @@ response=$(curl -k -s -H "Authorization: Bearer $token" "$node_url")

if [ "${MIGRATE_TO_DPV2:-}" == "true" ]; then
DPV2_MIGRATION_READY=$(jq '.metadata.labels."cloud.google.com/gke-dpv2-migration-ready"' <<<"$response")
echo "Migration to DPv2 in progress; node ready: '${DPV2_MIGRATION_READY}'"
log "Migration to DPv2 in progress; node ready: '${DPV2_MIGRATION_READY}'"
if [ "${DPV2_MIGRATION_READY}" != '"true"' ] # DPV2_MIGRATION_READY is a JSON string thus double quotes
then
ENABLE_CILIUM_PLUGIN=false
Expand All @@ -132,19 +134,19 @@ if [[ "${ENABLE_CILIUM_PLUGIN}" == "true" ]]; then
if [[ -n "${CILIUM_FAST_START_NAMESPACES:-}" ]]; then
cilium_cni_config=$(jq --arg namespaces "${CILIUM_FAST_START_NAMESPACES:-}" '.["dpv2-fast-start-namespaces"] = $namespaces' <<<"${cilium_cni_config}")
fi
echo "Adding Cilium plug-in to the CNI config: '$(jq -c . <<<"${cilium_cni_config}")'"
log "Adding Cilium plug-in to the CNI config: ${cilium_cni_config//$'\n'/ }"
cni_spec=${cni_spec//@cniCiliumPlugin/, ${cilium_cni_config}}
else
echo "Not using Cilium plug-in."
log "Not using Cilium plug-in."
cni_spec=${cni_spec//@cniCiliumPlugin/}
fi

# Add istio plug-in to spec if env var is not empty
if [[ -n "${ISTIO_CNI_CONFIG:-}" ]]; then
echo "Adding Istio plug-in to the CNI config."
log "Adding Istio plug-in to the CNI config."
cni_spec=${cni_spec//@cniIstioPlugin/, ${ISTIO_CNI_CONFIG}}
else
echo "Not using Istio plug-in."
log "Not using Istio plug-in."
cni_spec=${cni_spec//@cniIstioPlugin/}
fi

Expand Down Expand Up @@ -199,7 +201,7 @@ function fillSubnetsInCniSpecV2Template {

for subnet in $(jq -r '.spec.podCIDRs[]' <<<"$node") ; do
if is_ipv4_range "${subnet}" ; then
echo "IPv4 subnet detected in .spec.podCIDRs: '${subnet:-}'"
log "IPv4 subnet detected in .spec.podCIDRs: '${subnet:-}'"
if [ "${ENABLE_CALICO_NETWORK_POLICY}" == "true" ]; then
# calico uses special value `usePodCidr` instead of directly providing IP range
SUBNETS_REPLACEMENT+=('[{"subnet": "usePodCidr"}]')
Expand All @@ -209,14 +211,14 @@ function fillSubnetsInCniSpecV2Template {
ROUTES_REPLACEMENT+=('{"dst": "0.0.0.0/0"}')
fi
elif is_ipv6_range "${subnet}" ; then
echo "IPv6 subnet detected in .spec.podCIDRs: '${subnet:-}'"
log "IPv6 subnet detected in .spec.podCIDRs: '${subnet:-}'"
POPULATE_IP6TABLES="true"
echo "ip6tables will be populated because IPv6 podCIDR is configured (from .spec.podCIDRs)"
log "ip6tables will be populated because IPv6 podCIDR is configured (from .spec.podCIDRs)"
ipv6_subnet_configured="true"
SUBNETS_REPLACEMENT+=("$(jq -nc --arg subnet "${subnet}" '[{"subnet": $subnet}]')")
ROUTES_REPLACEMENT+=('{"dst": "::/0"}')
else
echo "[ERROR] Subnet detected in .spec.podCIDRs '${subnet}' is not a valid IP range"
log "[ERROR] Subnet detected in .spec.podCIDRs '${subnet}' is not a valid IP range"
exit 1
fi
done
Expand All @@ -227,10 +229,11 @@ function fillSubnetsInCniSpecV2Template {
# of /112 even when it is not specified in node's .spec.podCIDRs
if [ -n "${node_ipv6_addr:-}" ] && [ "${node_ipv6_addr}" != "null" ]; then
POPULATE_IP6TABLES="true"
echo "ip6tables will be populated because IPv6 podCIDR is configured (for directpath)"
log "ip6tables will be populated because IPv6 podCIDR is configured (for directpath)"
local subnet_from_node_ipv6_addr="${node_ipv6_addr}/112"
SUBNETS_REPLACEMENT+=("$(jq -nc --arg subnet "${subnet_from_node_ipv6_addr}" '[{"subnet": $subnet}]')")
ROUTES_REPLACEMENT+=("${CNI_SPEC_IPV6_ROUTE:-{\"dst\": \"::/0\"\}}")
local default_ipv6_route='{"dst": "::/0"}'
ROUTES_REPLACEMENT+=("${CNI_SPEC_IPV6_ROUTE:-${default_ipv6_route}}")
fi
fi

Expand All @@ -255,27 +258,28 @@ function fillSubnetsInCniSpecLegacyTemplate {
primary_subnet=$(jq -r '.spec.podCIDR' <<<"$node")

if is_ipv4_range "${primary_subnet:-}" ; then
echo "PodCIDR IPv4 detected: '${primary_subnet:-}'"
log "PodCIDR IPv4 detected: '${primary_subnet:-}'"
cni_spec=${cni_spec//@ipv4Subnet/[{\"subnet\": \"${primary_subnet:-}\"\}]}
elif is_ipv6_range "${primary_subnet:-}" ; then
echo "Primary IPv6 pod range detected '${primary_subnet:-}'. It will only work with new spec template."
log "Primary IPv6 pod range detected '${primary_subnet:-}'. It will only work with new spec template."
exit 1
else
echo "Response from $node_url"
echo "$node"
echo "Failed to fetch PodCIDR from K8s API server, primary_subnet=${primary_subnet:-}. Exiting (1)..."
log "Response from $node_url"
log "$node"
log "Failed to fetch PodCIDR from K8s API server, primary_subnet=${primary_subnet:-}. Exiting (1)..."
exit 1
fi

if [ -n "${node_ipv6_addr:-}" ] && [ "${node_ipv6_addr}" != "null" ]; then
echo "Found nic0 IPv6 address ${node_ipv6_addr:-}. Filling IPv6 subnet and route..."
log "Found nic0 IPv6 address ${node_ipv6_addr:-}. Filling IPv6 subnet and route..."
POPULATE_IP6TABLES="true"
echo "ip6tables will be populated because IPv6 podCIDR is configured (from node interface)"
log "ip6tables will be populated because IPv6 podCIDR is configured (from node interface)"

cni_spec=${cni_spec//@ipv6SubnetOptional/, [{\"subnet\": \"${node_ipv6_addr:-}/112\"\}]}
cni_spec=${cni_spec//@ipv6RouteOptional/, ${CNI_SPEC_IPV6_ROUTE:-{\"dst\": \"::/0\"\}}}
local default_ipv6_route='{"dst": "::/0"}'
cni_spec=${cni_spec//@ipv6RouteOptional/, ${CNI_SPEC_IPV6_ROUTE:-${default_ipv6_route}}}
else
echo "No IPv6 address found for nic0. Clearing IPv6 subnet and route..."
log "No IPv6 address found for nic0. Clearing IPv6 subnet and route..."
cni_spec=${cni_spec//@ipv6SubnetOptional/}
cni_spec=${cni_spec//@ipv6RouteOptional/}
fi
Expand All @@ -293,7 +297,7 @@ function fillSubnetsInCniSpec {


CLUSTER_STACK_TYPE=$(jq -r '.metadata.labels."cloud.google.com/gke-stack-type"' <<<"$response")
echo "Node's cluster stack type label: '${CLUSTER_STACK_TYPE:-}'"
log "Node's cluster stack type label: '${CLUSTER_STACK_TYPE:-}'"

node_ipv6_addr=''
if [ "$ENABLE_IPV6" == "true" ] || [ "${CLUSTER_STACK_TYPE:-}" == "IPV4_IPV6" ]; then
Expand All @@ -305,18 +309,18 @@ fillSubnetsInCniSpec "$response" "$node_ipv6_addr"
if [ "$POPULATE_IP6TABLES" == "true" ] ; then
# Ensure the IPv6 firewall rules are as expected.
# These rules mirror the IPv4 rules installed by kubernetes/cluster/gce/gci/configure-helper.sh
echo "Ensuring IPv6 firewall rules with ip6tables"
log "Ensuring IPv6 firewall rules with ip6tables"

if ip6tables -w -L INPUT | grep "Chain INPUT (policy DROP)" > /dev/null; then
echo "Add rules to accept all inbound TCP/UDP/ICMP/SCTP IPv6 packets"
log "Add rules to accept all inbound TCP/UDP/ICMP/SCTP IPv6 packets"
ip6tables -A INPUT -w -p tcp -j ACCEPT
ip6tables -A INPUT -w -p udp -j ACCEPT
ip6tables -A INPUT -w -p icmpv6 -j ACCEPT
ip6tables -A INPUT -w -p sctp -j ACCEPT
fi

if ip6tables -w -L FORWARD | grep "Chain FORWARD (policy DROP)" > /dev/null; then
echo "Add rules to accept all forwarded TCP/UDP/ICMP/SCTP IPv6 packets"
log "Add rules to accept all forwarded TCP/UDP/ICMP/SCTP IPv6 packets"
ip6tables -A FORWARD -w -p tcp -j ACCEPT
ip6tables -A FORWARD -w -p udp -j ACCEPT
ip6tables -A FORWARD -w -p icmpv6 -j ACCEPT
Expand All @@ -330,7 +334,7 @@ if [ "$POPULATE_IP6TABLES" == "true" ] ; then
ip6tables -I OUTPUT -m state --state NEW,ESTABLISHED,RELATED -j ACCEPT -w

if [ "${ENABLE_CALICO_NETWORK_POLICY}" == "true" ]; then
echo "Enabling IPv6 forwarding..."
log "Enabling IPv6 forwarding..."
# IPV6_FORWARDING_CONF override only to be used in tests.
echo 1 > "${IPV6_FORWARDING_CONF:-/proc/sys/net/ipv6/conf/all/forwarding}"
fi
Expand Down Expand Up @@ -360,31 +364,31 @@ for nic in cilium_wg0 "${default_nics[@]}"; do
# SYS_CLASS_NET override only to be used in tests.
mtu_file=${SYS_CLASS_NET:-/sys/class/net}/$nic/mtu
if [[ -f "$mtu_file" ]]; then
MTU=$(cat "$mtu_file")
MTU=$(<"$mtu_file")
MTU_SOURCE=$nic
break
fi
done

# Set mtu
cni_spec=${cni_spec//@mtu/$MTU}
echo "Set the default mtu to $MTU, inherited from $MTU_SOURCE"
log "Set the default mtu to $MTU, inherited from $MTU_SOURCE"

if [ "${ENABLE_CILIUM_PLUGIN}" == "true" ]; then
echo "Cilium plug-in is in use. Holding CNI configurations until Cilium is ready."
log "Cilium plug-in is in use. Holding CNI configurations until Cilium is ready."

# inotify calls back to the beginning of this script.
inotify /host/home/kubernetes/bin cilium-cni "$0" cni_ready cilium-cni
echo "Cilium plug-in binary is now confirmed as ready."
log "Cilium plug-in binary is now confirmed as ready."
fi

# Wait for istio plug-in if it is enabled
if [[ -n "${ISTIO_CNI_CONFIG:-}" ]]; then
echo "Istio plug-in is in use. Holding CNI configurations until Istio is ready."
log "Istio plug-in is in use. Holding CNI configurations until Istio is ready."

# inotify calls back to the beginning of this script.
inotify /host/home/kubernetes/bin istio-cni "$0" cni_ready istio-cni
echo "Istio plug-in binary is now confirmed as ready."
log "Istio plug-in binary is now confirmed as ready."
fi

# Atomically write to file.
Expand All @@ -402,7 +406,7 @@ function write_file {
rm -f -- "${temp_file}"
trap - EXIT

echo "File written to '${file}' with content (base64): $(base64 -w 0 -- "${file}")"
log "File written to '${file}' with content (base64): $(base64 -w 0 -- "${file}")"
}

# Output CNI spec (template).
Expand All @@ -422,48 +426,48 @@ cilium_health_check() {
# Cilium health check logic before watchdog and CNI STATUS API are introduced.
cilium_wait_or_ignore() {
if cilium_health_check "${CILIUM_HEALTH_MAX_WAIT_TIME:-600}"; then
echo "Cilium healthz reported success."
log "Cilium healthz reported success."
else
echo "Cilium not yet ready. Continuing anyway."
log "Cilium not yet ready. Continuing anyway."
fi
}

write_and_success() {
echo "Creating CNI spec at '${output_file}' with content: $(jq -c . <<<"${cni_spec}")"
log "Creating CNI spec at '${output_file}' with content: ${cni_spec//$'\n'/ }"
write_file "${output_file}" "${cni_spec}"
success
}

if [[ "${ENABLE_CILIUM_PLUGIN:-}" != "true" ]]; then
echo "Cilium CNI is not in use"
log "Cilium CNI is not in use"
write_and_success
fi

if [[ "${RUN_CNI_WATCHDOG:-}" != "true" ]]; then
echo "Cilium CNI is in use but CNI watchdog is not enabled"
log "Cilium CNI is in use but CNI watchdog is not enabled"
cilium_wait_or_ignore
write_and_success
fi

echo "Running CNI watchdog to watch Cilium and manage CNI config at '${output_file}' with content: $(jq -c . <<<"${cni_spec}")"
log "Running CNI watchdog to watch Cilium and manage CNI config at '${output_file}' with content: ${cni_spec//$'\n'/ }"
cilium_watchdog_success_wait=${CILIUM_WATCHDOG_SUCCESS_WAIT:-300}
cilium_watchdog_failure_retry=${CILIUM_WATCHDOG_FAILURE_RETRY:-60}
cilium_watchdog_fast_start_wait=${CILIUM_WATCHDOG_FAST_START_WAIT:-60}

if [[ -n "${CILIUM_FAST_START_NAMESPACES:-}" ]]; then
echo "Cilium has fast-start; writing CNI config upfront then wait for ${cilium_watchdog_fast_start_wait}s and start to check Cilium health."
log "Cilium has fast-start; writing CNI config upfront then wait for ${cilium_watchdog_fast_start_wait}s and start to check Cilium health."
write_file "${output_file}" "${cni_spec}"
sleep "${cilium_watchdog_fast_start_wait}"s
fi

while true; do
echo "Checking Cilium health allowing retries for up to ${cilium_watchdog_failure_retry}s."
log "Checking Cilium health allowing retries for up to ${cilium_watchdog_failure_retry}s."
if cilium_health_check "${cilium_watchdog_failure_retry}"; then
echo "Cilium healthz reported success; writing CNI config if not already there then wait for ${cilium_watchdog_success_wait}s."
log "Cilium healthz reported success; writing CNI config if not already there then wait for ${cilium_watchdog_success_wait}s."
[[ ! -f "${output_file}" ]] && write_file "${output_file}" "${cni_spec}"
sleep "${cilium_watchdog_success_wait}"s
else
echo "Cilium does not appear healthy; removing CNI config if it exists."
log "Cilium does not appear healthy; removing CNI config if it exists."
rm -f -- "${output_file}"
fi
done
Expand Down

0 comments on commit 6c9f963

Please sign in to comment.