Skip to content

Commit

Permalink
chore: health fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
moshloop committed Nov 19, 2024
1 parent 0c7cba6 commit 4a9429b
Show file tree
Hide file tree
Showing 13 changed files with 473 additions and 173 deletions.
69 changes: 47 additions & 22 deletions pkg/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,21 @@ func IsWorse(current, new HealthStatusCode) bool {
return newIndex > currentIndex
}

func GetHealthByConfigType(configType string, obj map[string]any, states ...string) HealthStatus {
switch configType {
case "AWS::ECS::Task":
return GetECSTaskHealth(obj)
}
func get(obj map[string]any, keys ...string) string {
v, _, _ := unstructured.NestedString(obj, keys...)
return strings.TrimSpace(v)
}

func isArgoHealth(s HealthStatusCode) bool {
return s == "Suspended" || s == "Degraded" || s == "Progressing"
}

func GetHealthByConfigType(configType string, obj map[string]any, states ...string) HealthStatus {
configClass := strings.Split(configType, "::")[0]

switch strings.ToLower(configClass) {
case "aws":
return getAWSHealthByConfigType(configType, obj, states...)
case "mongo":
return GetMongoHealth(obj)
case "kubernetes", "crossplane", "missioncontrol", "flux", "argo":
Expand Down Expand Up @@ -172,37 +178,56 @@ func GetResourceHealth(
Status: HealthStatusUnknown,
Message: err.Error(),
}
} else {
return health, nil
}
}

if healthOverride != nil {
health, err := healthOverride.GetResourceHealth(obj)
if health == nil && healthOverride != nil {
health, err = healthOverride.GetResourceHealth(obj)
if err != nil {
health = &HealthStatus{
return &HealthStatus{
Status: HealthStatusUnknown,
Message: err.Error(),
}
return health, err
}
if health != nil {
return health, nil
}, err
}
}

if obj.GetDeletionTimestamp() != nil {
return &HealthStatus{
Status: HealthStatusTerminating,
}, nil
if health == nil ||
health.Status == "" ||
isArgoHealth(health.Status) {
// try and get a better status from conditions
defaultHealth, err := GetDefaultHealth(obj)
if err != nil {
return &HealthStatus{
Status: "HealthParseError",
Message: lo.Elipse(err.Error(), 500),
}, nil
}
if health == nil {
health = defaultHealth
}
if health.Status == "" {
health.Status = defaultHealth.Status
}

if defaultHealth.Status != "" && isArgoHealth(health.Status) && !isArgoHealth(defaultHealth.Status) {
health.Status = defaultHealth.Status
}
if health.Message == "" {
health.Message = defaultHealth.Message
}
}

if health == nil {
return &HealthStatus{
health = &HealthStatus{
Status: HealthStatusUnknown,
Ready: true,
}, nil
}
}
if obj.GetDeletionTimestamp() != nil {
health.Status = HealthStatusTerminating
health.Ready = false
}

return health, err
}

Expand Down Expand Up @@ -274,5 +299,5 @@ func GetHealthCheckFunc(gvk schema.GroupVersionKind) func(obj *unstructured.Unst
return getHPAHealth
}
}
return GetDefaultHealth
return nil
}
45 changes: 45 additions & 0 deletions pkg/health/health_aws.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,50 @@
package health

import (
"fmt"
"strings"
)

func GetAWSResourceHealth(_, status string) (health HealthStatus) {
return GetHealthFromStatusName(status)
}

func getAWSHealthByConfigType(configType string, obj map[string]any, states ...string) HealthStatus {
switch configType {
case "AWS::ECS::Task":
return GetECSTaskHealth(obj)
case "AWS::Cloudformation::Stack":
return GetHealthFromStatusName(get(obj, "StackStatus"), get(obj, "StackStatusReason"))
case "AWS::EC2::Instance":
return GetHealthFromStatusName(get(obj, "State"))
case "AWS::RDS::DBInstance":
return GetHealthFromStatusName(get(obj, "DBInstanceStatus"))
case "AWS::ElasticLoadBalancing::LoadBalancer":
return GetHealthFromStatusName(get(obj, "State", "Code"))
case "AWS::AutoScaling::AutoScalingGroup":
return GetHealthFromStatusName(get(obj, "Status"))
case "AWS::Lambda::Function":
return GetHealthFromStatusName(get(obj, "State"), get(obj, "StateReasonCode"))
case "AWS::DynamoDB::Table":
return GetHealthFromStatusName(get(obj, "TableStatus"))
case "AWS::ElastiCache::CacheCluster":
return GetHealthFromStatusName(get(obj, "CacheClusterStatus"))
}

if len(states) > 0 {
return GetHealthFromStatusName(states[0])
} else {
for k, v := range obj {
_k := strings.ToLower(k)
_v := fmt.Sprintf("%s", v)
if _k == "status" || _k == "state" ||
strings.HasSuffix(_k, "status") {
return GetHealthFromStatusName(_v)
}
}
}
return HealthStatus{
Health: HealthUnknown,
}

}
27 changes: 14 additions & 13 deletions pkg/health/health_aws_ecs.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ func GetECSTaskHealth(obj map[string]any) (health HealthStatus) {
}

if v, ok := obj["HealthStatus"].(string); ok {
hr.Health = Health(lo.CamelCase(v))
hr.Status = HealthStatusCode(HumanCase(v))
}

switch hr.Status {
switch strings.ToUpper(string(hr.Status)) {
case "RUNNING":
hr.Health = HealthHealthy
hr.Ready = true
case "STOPPED", "DELETED":
case "STOPPED", "DELETED", "UNKNOWN":
hr.Ready = true
hr.Health = HealthUnknown
}
Expand All @@ -31,15 +31,16 @@ func GetECSTaskHealth(obj map[string]any) (health HealthStatus) {
if stopCode != "" {
hr.Status = HealthStatusCode(stopCode)
}
switch stopCode {
case "TaskFailedToStart":

switch strings.ToUpper(stopCode) {
case "TASKFAILEDTOSTART":
hr.Health = HealthUnhealthy
case "EssentialContainerExited":
case "ESSENTIALCONTAINEREXITED":
hr.Status = HealthStatusCrashed
hr.Health = HealthUnhealthy
case "UserInitiated":
case "USERINITIATED":
hr.Status = HealthStatusStopped
case "ServiceSchedulerInitiated":
case "SERVICESCHEDULERINITIATED":
hr.Status = HealthStatusTerminating
}

Expand All @@ -52,15 +53,15 @@ func GetECSTaskHealth(obj map[string]any) (health HealthStatus) {
hr.Message = strings.TrimSpace(reason[idx+1:])
}

switch hr.Status {
case "ContainerRuntimeError", "ContainerRuntimeTimeoutError", "OutOfMemoryError":
switch strings.ToUpper(string(hr.Status)) {
case "CONTAINERRUNTIMEERROR", "CONTAINERRUNTIMETIMEOUTERROR", "OUTOFMEMORYERROR":
hr.Health = HealthUnhealthy
case "InternalError", "CannotCreateVolumeError", "ResourceNotFoundException", "CannotStartContainerError":
case "INTERNALERROR", "CANNOTCREATEVOLUMEERROR", "RESOURCENOTFOUNDERROR", "CANNOTSTARTCONTAINERERROR":
hr.Health = HealthUnhealthy
hr.Ready = true
case "SpotInterruptionError", "CannotStopContainerError", "CannotInspectContainerError":
case "SPOTINTERRUPTIONERROR", "CANNOTSTOPCONTAINERERROR", "CANNOTINSPECTCONTAINERERROR":
hr.Health = HealthWarning
case "TaskFailedToStart", "ResourceInitializationError", "CannotPullContainer":
case "TASKFAILEDTOSTART", "RESOURCEINITIALIZATIONERROR", "CANNOTPULLCONTAINER":
hr.Health = HealthUnhealthy
default:
hr.Health = HealthUnhealthy
Expand Down
Loading

0 comments on commit 4a9429b

Please sign in to comment.