Skip to content

Commit

Permalink
add new addonHealthCheck func to check all fileds and support wildcard
Browse files Browse the repository at this point in the history
Signed-off-by: Zhiwei Yin <[email protected]>
  • Loading branch information
zhiweiyin318 committed Nov 23, 2024
1 parent d787e98 commit a8d8c1b
Show file tree
Hide file tree
Showing 5 changed files with 547 additions and 37 deletions.
126 changes: 91 additions & 35 deletions pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package agentdeploy
import (
"context"
"fmt"
"regexp"
"strings"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -167,17 +168,38 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
manifestConditions = append(manifestConditions, work.Status.ResourceStatus.Manifests...)
}

probeFields, healthChecker, err := s.analyzeWorkProber(s.agentAddon, cluster, addon)
probeFields, healthChecker, healthAllChecker, err := s.analyzeWorkProber(s.agentAddon, cluster, addon)
if err != nil {
// should not happen, return
return err
}

var resultFields []agent.ResultField

for _, field := range probeFields {
result := findResultByIdentifier(field.ResourceIdentifier, manifestConditions)
results := findResultsByIdentifier(field.ResourceIdentifier, manifestConditions)

// healthChecker will be ignored if healthAllChecker is set
if healthAllChecker != nil {
if len(results) != 0 {
resultFields = append(resultFields, results...)
}
continue
}

if healthChecker == nil {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionFalse,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
Message: fmt.Sprintf("health checker function is not set %v", err),
})
return nil
}

// if no results are returned. it is possible that work agent has not returned the feedback value.
// mark condition to unknown
if result == nil {
if len(results) == 0 {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionUnknown,
Expand All @@ -189,16 +211,29 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
return nil
}

err := healthChecker(field.ResourceIdentifier, *result)
if err != nil {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionFalse,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
})
return nil
for _, result := range results {
err := healthChecker(result.ResourceIdentifier, result.FeedbackResult)
if err != nil {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionFalse,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
})
return nil
}
}

}

if healthAllChecker != nil && healthAllChecker(resultFields) != nil {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionFalse,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
})
return nil
}

meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Expand All @@ -214,21 +249,23 @@ func (s *healthCheckSyncer) analyzeWorkProber(
agentAddon agent.AgentAddon,
cluster *clusterv1.ManagedCluster,
addon *addonapiv1alpha1.ManagedClusterAddOn,
) ([]agent.ProbeField, agent.AddonHealthCheckFunc, error) {
) ([]agent.ProbeField, agent.AddonHealthCheckFunc, agent.AddonHealthCheckAllFunc, error) {

switch agentAddon.GetAgentAddonOptions().HealthProber.Type {
case agent.HealthProberTypeWork:
workProber := agentAddon.GetAgentAddonOptions().HealthProber.WorkProber
if workProber != nil {
return workProber.ProbeFields, workProber.HealthCheck, nil
return workProber.ProbeFields, workProber.HealthCheck, workProber.HealthCheckAll, nil
}
return nil, nil, fmt.Errorf("work prober is not configured")
return nil, nil, nil, fmt.Errorf("work prober is not configured")
case agent.HealthProberTypeDeploymentAvailability:
return s.analyzeDeploymentWorkProber(agentAddon, cluster, addon)
probeFields, heathChecker, err := s.analyzeDeploymentWorkProber(agentAddon, cluster, addon)
return probeFields, heathChecker, nil, err
case agent.HealthProberTypeWorkloadAvailability:
return s.analyzeWorkloadsWorkProber(agentAddon, cluster, addon)
probeFields, heathChecker, err := s.analyzeWorkloadsWorkProber(agentAddon, cluster, addon)
return probeFields, heathChecker, nil, err
default:
return nil, nil, fmt.Errorf("unsupported health prober type %s", agentAddon.GetAgentAddonOptions().HealthProber.Type)
return nil, nil, nil, fmt.Errorf("unsupported health prober type %s", agentAddon.GetAgentAddonOptions().HealthProber.Type)
}
}

Expand Down Expand Up @@ -294,27 +331,46 @@ func (s *healthCheckSyncer) analyzeWorkloadsWorkProber(
return probeFields, utils.WorkloadAvailabilityHealthCheck, nil
}

func findResultByIdentifier(identifier workapiv1.ResourceIdentifier, manifestConditions []workapiv1.ManifestCondition) *workapiv1.StatusFeedbackResult {
func findResultsByIdentifier(identifier workapiv1.ResourceIdentifier,
manifestConditions []workapiv1.ManifestCondition) []agent.ResultField {
var results []agent.ResultField
for _, status := range manifestConditions {
if identifier.Group != status.ResourceMeta.Group {
continue
}
if identifier.Resource != status.ResourceMeta.Resource {
continue
}
if identifier.Name != status.ResourceMeta.Name {
continue
}
if identifier.Namespace != status.ResourceMeta.Namespace {
continue
if resourceMatch(status.ResourceMeta, identifier) && len(status.StatusFeedbacks.Values) != 0 {
results = append(results, agent.ResultField{
ResourceIdentifier: workapiv1.ResourceIdentifier{
Group: status.ResourceMeta.Group,
Resource: status.ResourceMeta.Resource,
Name: status.ResourceMeta.Name,
Namespace: status.ResourceMeta.Namespace,
},
FeedbackResult: status.StatusFeedbacks,
})
}
}

if len(status.StatusFeedbacks.Values) == 0 {
return nil
}
return results
}

return &status.StatusFeedbacks
// compare two string, target may include *
func wildcardMatch(resource, target string) bool {
if resource == target || target == "*" {
return true
}

return nil
pattern := "^" + regexp.QuoteMeta(target) + "$"
pattern = strings.ReplaceAll(pattern, "\\*", ".*")

re, err := regexp.Compile(pattern)
if err != nil {
return false
}

return re.MatchString(resource)
}

func resourceMatch(resourceMeta workapiv1.ManifestResourceMeta, resource workapiv1.ResourceIdentifier) bool {
return resourceMeta.Group == resource.Group &&
resourceMeta.Resource == resource.Resource &&
wildcardMatch(resourceMeta.Namespace, resource.Namespace) &&
wildcardMatch(resourceMeta.Name, resource.Name)
}
190 changes: 189 additions & 1 deletion pkg/addonmanager/controllers/agentdeploy/healthcheck_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package agentdeploy

import (
"context"
"fmt"
"testing"
"time"

Expand Down Expand Up @@ -362,7 +363,163 @@ func TestHealthCheckReconcile(t *testing.T) {
Message: "test add-on is available.",
},
},

{
name: "Health check mode is work and WorkProber check pass with addonHealthCheckAllFunc",
testAddon: &healthCheckTestAgent{name: "test",
health: newDeploymentsCheckAllProber(types.NamespacedName{Name: "test-deployment0", Namespace: "default"},
types.NamespacedName{Name: "test-deployment1", Namespace: "default"}),
},
addon: addontesting.NewAddonWithConditions("test", "cluster1", manifestAppliedCondition),
existingWork: []runtime.Object{
&v1.ManifestWork{
ObjectMeta: metav1.ObjectMeta{
Name: "addon-test-deploy-01",
Namespace: "cluster1",
Labels: map[string]string{
"open-cluster-management.io/addon-name": "test",
},
},
Spec: v1.ManifestWorkSpec{},
Status: v1.ManifestWorkStatus{
ResourceStatus: v1.ManifestResourceStatus{
Manifests: []v1.ManifestCondition{
{
ResourceMeta: v1.ManifestResourceMeta{
Ordinal: 0,
Group: "apps",
Version: "",
Kind: "",
Resource: "deployments",
Name: "test-deployment0",
Namespace: "default",
},
StatusFeedbacks: v1.StatusFeedbackResult{
Values: []v1.FeedbackValue{
{
Name: "Replicas",
Value: v1.FieldValue{
Integer: boolPtr(1),
},
},
{
Name: "ReadyReplicas",
Value: v1.FieldValue{
Integer: boolPtr(2),
},
},
},
},
},
{
ResourceMeta: v1.ManifestResourceMeta{
Ordinal: 0,
Group: "apps",
Version: "",
Kind: "",
Resource: "deployments",
Name: "test-deployment1",
Namespace: "default",
},
StatusFeedbacks: v1.StatusFeedbackResult{},
},
},
},
Conditions: []metav1.Condition{
{
Type: v1.WorkAvailable,
Status: metav1.ConditionTrue,
},
},
},
},
},
expectedErr: nil,
expectedHealthCheckMode: addonapiv1alpha1.HealthCheckModeCustomized,
expectAvailableCondition: metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionTrue,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeAvailable,
Message: "test add-on is available.",
},
},
{
name: "Health check mode is work and WorkProber check pass with addonHealthCheckAllFunc and wildcard",
testAddon: &healthCheckTestAgent{name: "test",
health: newDeploymentsCheckAllProber(types.NamespacedName{Name: "*", Namespace: "*"}),
},
addon: addontesting.NewAddonWithConditions("test", "cluster1", manifestAppliedCondition),
existingWork: []runtime.Object{
&v1.ManifestWork{
ObjectMeta: metav1.ObjectMeta{
Name: "addon-test-deploy-01",
Namespace: "cluster1",
Labels: map[string]string{
"open-cluster-management.io/addon-name": "test",
},
},
Spec: v1.ManifestWorkSpec{},
Status: v1.ManifestWorkStatus{
ResourceStatus: v1.ManifestResourceStatus{
Manifests: []v1.ManifestCondition{
{
ResourceMeta: v1.ManifestResourceMeta{
Ordinal: 0,
Group: "apps",
Version: "",
Kind: "",
Resource: "deployments",
Name: "test-deployment0",
Namespace: "default",
},
StatusFeedbacks: v1.StatusFeedbackResult{
Values: []v1.FeedbackValue{
{
Name: "Replicas",
Value: v1.FieldValue{
Integer: boolPtr(1),
},
},
{
Name: "ReadyReplicas",
Value: v1.FieldValue{
Integer: boolPtr(2),
},
},
},
},
},
{
ResourceMeta: v1.ManifestResourceMeta{
Ordinal: 0,
Group: "apps",
Version: "",
Kind: "",
Resource: "deployments",
Name: "test-deployment1",
Namespace: "default",
},
StatusFeedbacks: v1.StatusFeedbackResult{},
},
},
},
Conditions: []metav1.Condition{
{
Type: v1.WorkAvailable,
Status: metav1.ConditionTrue,
},
},
},
},
},
expectedErr: nil,
expectedHealthCheckMode: addonapiv1alpha1.HealthCheckModeCustomized,
expectAvailableCondition: metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionTrue,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeAvailable,
Message: "test add-on is available.",
},
},
{
name: "Health check mode is deployment availability but manifestApplied condition is not true",
testAddon: &healthCheckTestAgent{name: "test",
Expand Down Expand Up @@ -929,3 +1086,34 @@ func TestHealthCheckReconcile(t *testing.T) {
})
}
}

func addonHealthCheckAllFunc(resultFields []agent.ResultField) error {
for _, field := range resultFields {
switch field.ResourceIdentifier.Resource {
case "deployments":
err := utils.DeploymentAvailabilityHealthCheck(field.ResourceIdentifier, field.FeedbackResult)
if err == nil {
return nil
}
}
}
return fmt.Errorf("not meet the results")
}

func newDeploymentsCheckAllProber(deployments ...types.NamespacedName) *agent.HealthProber {
probeFields := []agent.ProbeField{}
for _, deploy := range deployments {
mc := utils.DeploymentWellKnowManifestConfig(deploy.Namespace, deploy.Name)
probeFields = append(probeFields, agent.ProbeField{
ResourceIdentifier: mc.ResourceIdentifier,
ProbeRules: mc.FeedbackRules,
})
}
return &agent.HealthProber{
Type: agent.HealthProberTypeWork,
WorkProber: &agent.WorkHealthProber{
ProbeFields: probeFields,
HealthCheckAll: addonHealthCheckAllFunc,
},
}
}
Loading

0 comments on commit a8d8c1b

Please sign in to comment.