Skip to content

Commit

Permalink
feat: container readiness probe waiting
Browse files Browse the repository at this point in the history
  • Loading branch information
adityathebe committed Jul 29, 2024
1 parent 6856991 commit 5e41c1e
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 5 deletions.
31 changes: 27 additions & 4 deletions pkg/health/health_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"strings"
"time"

"github.com/samber/lo"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -201,10 +202,16 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
}

// if it's not ready, check to see if any container terminated, if so, it's degraded
allContainersReady := true
var nonReadyContainers []ContainerRecord
for _, ctrStatus := range pod.Status.ContainerStatuses {
if !ctrStatus.Ready {
allContainersReady = false
spec := lo.Filter(pod.Spec.Containers, func(i corev1.Container, _ int) bool {
return i.Name == ctrStatus.Name
})
nonReadyContainers = append(nonReadyContainers, ContainerRecord{
Status: ctrStatus,
Spec: spec[0],
})
}

if ctrStatus.LastTerminationState.Terminated != nil {
Expand All @@ -218,19 +225,30 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
}

// Pod isn't ready but all containers are
if allContainersReady {
if len(nonReadyContainers) == 0 {
return &HealthStatus{
Health: HealthWarning,
Status: HealthStatusRunning,
Message: pod.Status.Message,
}, nil
}

var containersWaitingForReadinessProbe []string
for _, c := range nonReadyContainers {
if c.Spec.ReadinessProbe == nil || c.Spec.ReadinessProbe.InitialDelaySeconds == 0 {
continue
}

if time.Since(c.Status.State.Running.StartedAt.Time) <= time.Duration(c.Spec.ReadinessProbe.InitialDelaySeconds)*time.Second {
containersWaitingForReadinessProbe = append(containersWaitingForReadinessProbe, c.Spec.Name)
}
}

// otherwise we are progressing towards a ready state
return &HealthStatus{
Health: HealthUnknown,
Status: HealthStatusStarting,
Message: pod.Status.Message,
Message: fmt.Sprintf("Container %s is waiting for readiness probe", strings.Join(containersWaitingForReadinessProbe, ",")),
}, nil

case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever:
Expand All @@ -254,3 +272,8 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
Message: pod.Status.Message,
}, nil
}

type ContainerRecord struct {
Spec corev1.Container
Status corev1.ContainerStatus
}
15 changes: 14 additions & 1 deletion pkg/health/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ func assertAppHealth(t *testing.T, yamlPath string, expectedStatus health.Health
assert.Equal(t, expectedStatus, health.Status)
}

func assertAppHealthWithOverwriteMsg(t *testing.T, yamlPath string, overwrites map[string]string, expectedStatus health.HealthStatusCode, expectedHealth health.Health, expectedReady bool, expectedMsg string) {
health := getHealthStatus(yamlPath, t, overwrites)
assert.NotNil(t, health)
assert.Equal(t, expectedHealth, health.Health)
assert.Equal(t, expectedReady, health.Ready)
assert.Equal(t, expectedStatus, health.Status)
assert.Equal(t, expectedMsg, health.Message)
}

func assertAppHealthWithOverwrite(t *testing.T, yamlPath string, overwrites map[string]string, expectedStatus health.HealthStatusCode, expectedHealth health.Health, expectedReady bool) {
health := getHealthStatus(yamlPath, t, overwrites)
assert.NotNil(t, health)
Expand Down Expand Up @@ -143,14 +152,18 @@ func TestHPA(t *testing.T) {
}

func TestPod(t *testing.T) {
assertAppHealthWithOverwriteMsg(t, "./testdata/pod-not-ready-container-not-ready.yaml", map[string]string{
"2024-07-29T06:32:56Z": time.Now().Add(time.Minute * 10).Format(time.RFC3339),
}, health.HealthStatusStarting, health.HealthUnknown, false, "Container nginx is waiting for readiness probe")

// Pod not ready
assertAppHealth(t, "./testdata/pod-not-ready-but-container-ready.yaml", health.HealthStatusRunning, health.HealthWarning, false)

// Restart Loop
assertAppHealth(t, "./testdata/pod-ready-container-terminated.yaml", health.HealthStatusRunning, health.HealthWarning, true)
assertAppHealthWithOverwrite(t, "./testdata/pod-ready-container-terminated.yaml", map[string]string{
"2024-07-18T12:03:16Z": "2024-07-18T12:05:16Z",
}, health.HealthStatusRunning, health.HealthHealthy, true)
}, health.HealthStatusRunning, health.HealthWarning, true)

// Less than 30 minutes
assertAppHealthWithOverwrite(t, "./testdata/pod-high-restart-count.yaml", map[string]string{
Expand Down
123 changes: 123 additions & 0 deletions pkg/health/testdata/pod-not-ready-container-not-ready.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: '2024-07-29T06:32:55Z'
generateName: nginx-deployment-559c4fc45b-
labels:
app: nginx
pod-template-hash: 559c4fc45b
name: nginx-deployment-559c4fc45b-xdlmh
namespace: default
ownerReferences:
- apiVersion: apps/v1
blockOwnerDeletion: true
controller: true
kind: ReplicaSet
name: nginx-deployment-559c4fc45b
uid: 28b8d6dd-1ffe-4ede-8e24-0bafa3bb71e3
resourceVersion: '68571321'
uid: 9d304944-d50a-4b8a-971a-7fcad1a4ca9f
spec:
containers:
- image: nginx:alpine
imagePullPolicy: IfNotPresent
name: nginx
ports:
- containerPort: 80
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /
port: 80
scheme: HTTP
initialDelaySeconds: 300
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: 100m
memory: 128Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-ssdbh
readOnly: true
dnsPolicy: ClusterFirst
enableServiceLinks: true
nodeName: saka
preemptionPolicy: PreemptLowerPriority
priority: 0
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 300
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 300
volumes:
- name: kube-api-access-ssdbh
projected:
defaultMode: 420
sources:
- serviceAccountToken:
expirationSeconds: 3607
path: token
- configMap:
items:
- key: ca.crt
path: ca.crt
name: kube-root-ca.crt
- downwardAPI:
items:
- fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
path: namespace
status:
conditions:
- lastTransitionTime: '2024-07-29T06:32:55Z'
status: 'True'
type: Initialized
- lastTransitionTime: '2024-07-29T06:32:55Z'
message: 'containers with unready status: [nginx]'
reason: ContainersNotReady
status: 'False'
type: Ready
- lastTransitionTime: '2024-07-29T06:32:55Z'
message: 'containers with unready status: [nginx]'
reason: ContainersNotReady
status: 'False'
type: ContainersReady
- lastTransitionTime: '2024-07-29T06:32:55Z'
status: 'True'
type: PodScheduled
containerStatuses:
- containerID: containerd://01d5d0ded389b8dffa95f85fc8cb9a8a1bf916beae74a75dd7a62bc10dabef01
image: docker.io/library/nginx:alpine
imageID: docker.io/library/nginx@sha256:208b70eefac13ee9be00e486f79c695b15cef861c680527171a27d253d834be9
lastState: {}
name: nginx
ready: false
restartCount: 0
started: true
state:
running:
startedAt: '2024-07-29T06:32:56Z'
hostIP: 10.99.99.8
phase: Running
podIP: 10.42.2.27
podIPs:
- ip: 10.42.2.27
qosClass: BestEffort
startTime: '2024-07-29T06:32:55Z'

0 comments on commit 5e41c1e

Please sign in to comment.