Skip to content

Commit

Permalink
fix: replicaset health
Browse files Browse the repository at this point in the history
  • Loading branch information
moshloop committed Nov 18, 2024
1 parent 44a61d9 commit 7b93963
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 61 deletions.
77 changes: 18 additions & 59 deletions pkg/health/health_replicaset.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package health

import (
"fmt"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand All @@ -18,79 +17,39 @@ func getReplicaSetHealth(obj *unstructured.Unstructured) (*HealthStatus, error)
if err != nil {
return nil, err
}
return getAppsv1ReplicaSetHealth(&replicaSet)
return getAppsv1ReplicaSetHealth(&replicaSet, obj)
default:
return nil, fmt.Errorf("unsupported ReplicaSet GVK: %s", gvk)
}
}

func getAppsv1ReplicaSetHealth(rs *appsv1.ReplicaSet) (*HealthStatus, error) {
func getAppsv1ReplicaSetHealth(rs *appsv1.ReplicaSet, obj *unstructured.Unstructured) (*HealthStatus, error) {
replicas := int32(0)
if rs.Spec.Replicas != nil {
replicas = *rs.Spec.Replicas
}
startDeadline := GetStartDeadline(rs.Spec.Template.Spec.Containers...)
age := time.Since(rs.CreationTimestamp.Time).Truncate(time.Minute).Abs()

health := HealthHealthy
if rs.Status.ReadyReplicas == 0 {
if rs.Status.Replicas > 0 && age < startDeadline {
health = HealthUnknown
} else {
health = HealthUnhealthy
}
} else if rs.Status.ReadyReplicas < replicas {
health = HealthWarning
} else if rs.Status.ReadyReplicas >= replicas {
health = HealthHealthy
}
hr := getReplicaHealth(ReplicaStatus{
Object: obj,
Containers: rs.Spec.Template.Spec.Containers,
Desired: int(replicas),
Replicas: int(rs.Status.Replicas),
Ready: int(rs.Status.ReadyReplicas),
Updated: int(rs.Status.FullyLabeledReplicas),
})

if replicas == 0 && rs.Status.Replicas == 0 {
return &HealthStatus{
Ready: true,
Status: HealthStatusScaledToZero,
Health: health,
}, nil
}

if rs.Generation == rs.Status.ObservedGeneration &&
rs.Status.ReadyReplicas == *rs.Spec.Replicas {
return &HealthStatus{
Health: health,
Status: HealthStatusRunning,
Ready: true,
}, nil
if rs.Generation != rs.Status.ObservedGeneration {
hr.Status = HealthStatusUpdating
hr.Ready = false
}

failCondition := getAppsv1ReplicaSetCondition(rs.Status, appsv1.ReplicaSetReplicaFailure)
if failCondition != nil && failCondition.Status == corev1.ConditionTrue {
return &HealthStatus{
Health: health,
Status: HealthStatusError,
Message: failCondition.Message,
}, nil
if hr.Health != HealthUnhealthy && failCondition != nil && failCondition.Status == corev1.ConditionTrue {
hr.Ready = true
hr.Health = HealthUnhealthy
hr.Message = failCondition.Message
}

if rs.Status.ReadyReplicas < *rs.Spec.Replicas {
return &HealthStatus{
Health: health,
Status: HealthStatusScalingUp,
Message: fmt.Sprintf("%d of %d pods ready", rs.Status.ReadyReplicas, *rs.Spec.Replicas),
}, nil
}

if rs.Status.ReadyReplicas > *rs.Spec.Replicas {
return &HealthStatus{
Health: health,
Status: HealthStatusScalingDown,
Message: fmt.Sprintf("%d pods terminating", rs.Status.ReadyReplicas-*rs.Spec.Replicas),
}, nil
}

return &HealthStatus{
Status: HealthStatusUnknown,
Health: health,
}, nil
return hr, nil
}

func getAppsv1ReplicaSetCondition(
Expand Down
4 changes: 2 additions & 2 deletions pkg/health/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,11 +545,11 @@ func TestHPA(t *testing.T) {
func TestReplicaSet(t *testing.T) {
assertAppHealthWithOverwrite(t, "./testdata/replicaset-ittools.yml", map[string]string{
"2024-08-03T06:06:18Z": time.Now().Add(-time.Minute * 2).UTC().Format("2006-01-02T15:04:05Z"),
}, health.HealthStatusRunning, health.HealthHealthy, true)
}, health.HealthStatusRunning, health.HealthHealthy, false)

assertAppHealthWithOverwrite(t, "./testdata/replicaset-unhealthy-pods.yaml", map[string]string{
"2024-10-21T11:20:19Z": time.Now().Add(-time.Minute * 2).UTC().Format("2006-01-02T15:04:05Z"),
}, health.HealthStatusScalingUp, health.HealthUnknown, false)
}, health.HealthStatusStarting, health.HealthUnknown, false)
}

func TestPod(t *testing.T) {
Expand Down
89 changes: 89 additions & 0 deletions pkg/health/testdata/Kubernetes/ReplicaSet/unknown.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
uid: 5164138b-dc4a-4192-9107-1e6341eafc29
name: incident-manager-ui-66cfd695c
labels:
pod-template-hash: 66cfd695c
app.kubernetes.io/name: incident-manager-ui
app.kubernetes.io/instance: mission-control
namespace: mission-control
annotations:
expected-status: Scaled to Zero
expected-ready: "true"
meta.helm.sh/release-name: mission-control
meta.helm.sh/release-namespace: mission-control
deployment.kubernetes.io/revision: "109"
deployment.kubernetes.io/max-replicas: "2"
deployment.kubernetes.io/desired-replicas: "1"
ownerReferences:
- uid: f40af5c4-d2d3-4478-8a97-a6125083dfcf
kind: Deployment
name: incident-manager-ui
apiVersion: apps/v1
controller: true
blockOwnerDeletion: true
creationTimestamp: 2024-11-11T19:03:49Z
spec:
replicas: 0
selector:
matchLabels:
pod-template-hash: 66cfd695c
app.kubernetes.io/name: incident-manager-ui
app.kubernetes.io/instance: mission-control
template:
spec:
dnsPolicy: ClusterFirst
containers:
- env:
- name: HOSTNAME
value: 0.0.0.0
- name: ORY_KRATOS_URL
value: https://incident-commander.demo.aws.flanksource.com/api/.ory
- name: BACKEND_URL
value: http://mission-control:8080
name: flanksource-ui
image: public.ecr.aws/flanksource/incident-manager-ui:v1.0.822
ports:
- name: http
protocol: TCP
containerPort: 3000
resources:
limits:
memory: 2Gi
requests:
cpu: 200m
memory: 200Mi
livenessProbe:
httpGet:
path: /api/_health
port: http
scheme: HTTP
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
successThreshold: 1
readinessProbe:
httpGet:
path: /api/_health
port: http
scheme: HTTP
periodSeconds: 10
timeoutSeconds: 1
failureThreshold: 3
successThreshold: 1
imagePullPolicy: IfNotPresent
securityContext: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
metadata:
labels:
pod-template-hash: 66cfd695c
app.kubernetes.io/name: incident-manager-ui
app.kubernetes.io/instance: mission-control
status:
replicas: 0

0 comments on commit 7b93963

Please sign in to comment.