Skip to content

Commit

Permalink
add new addonHealthCheck func to check all fileds and support wildcard
Browse files Browse the repository at this point in the history
Signed-off-by: Zhiwei Yin <[email protected]>
  • Loading branch information
zhiweiyin318 committed Nov 25, 2024
1 parent d787e98 commit 12371fb
Show file tree
Hide file tree
Showing 14 changed files with 604 additions and 42 deletions.
2 changes: 1 addition & 1 deletion cmd/example/helloworld_helm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func runController(ctx context.Context, kubeConfig *rest.Config) error {
utils.AgentInstallNamespaceFromDeploymentConfigFunc(
utils.NewAddOnDeploymentConfigGetter(addonClient),
),
).
).WithAgentHealthProber(helloworld_helm.AgentHealthProber()).
BuildHelmAgentAddon()
if err != nil {
klog.Errorf("failed to build agent %v", err)
Expand Down
41 changes: 41 additions & 0 deletions examples/helloworld_helm/helloworld_helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"context"
"embed"
"fmt"
"open-cluster-management.io/addon-framework/pkg/agent"
workapiv1 "open-cluster-management.io/api/work/v1"
"os"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -112,3 +114,42 @@ func GetImageValues(kubeClient kubernetes.Interface) addonfactory.GetValuesFunc
return overrideValues, nil
}
}

func AgentHealthProber() *agent.HealthProber {
return &agent.HealthProber{
Type: agent.HealthProberTypeWork,
WorkProber: &agent.WorkHealthProber{
ProbeFields: []agent.ProbeField{
{
ResourceIdentifier: workapiv1.ResourceIdentifier{
Group: "apps",
Resource: "deployments",
Name: "*",
Namespace: "*",
},
ProbeRules: []workapiv1.FeedbackRule{
{
Type: workapiv1.WellKnownStatusType,
},
},
},
},
HealthCheckAll: func(fields []agent.ResultField) error {
for _, field := range fields {
if len(field.FeedbackResult.Values) == 0 {
return fmt.Errorf("no helloworldhelmhm agent")
}
switch field.ResourceIdentifier.Name {
case "helloworldhelm-agent":
for _, value := range field.FeedbackResult.Values {
if value.Name == "AvailableReplicas" && *value.Value.Integer != 1 {
return nil
}
}
}
}
return fmt.Errorf("helloworldhelmhm agent is not ready")
},
},
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ require (
k8s.io/component-base v0.30.2
k8s.io/klog/v2 v2.120.1
k8s.io/utils v0.0.0-20240310230437-4693a0247e57
open-cluster-management.io/api v0.15.0
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874
open-cluster-management.io/sdk-go v0.15.0
sigs.k8s.io/controller-runtime v0.18.4
)
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -474,8 +474,8 @@ k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7F
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
k8s.io/utils v0.0.0-20240310230437-4693a0247e57 h1:gbqbevonBh57eILzModw6mrkbwM0gQBEuevE/AaBsHY=
k8s.io/utils v0.0.0-20240310230437-4693a0247e57/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
open-cluster-management.io/api v0.15.0 h1:lRee1KOlGHZb2scTA7ff9E9Fxt2hJc7jpkHnaCbvkOU=
open-cluster-management.io/api v0.15.0/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874 h1:WgkuYXTbJV7EK+qtiMq3soa21faGUKeTG5w0C8Mn1Ok=
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
open-cluster-management.io/sdk-go v0.15.0 h1:2IAJnPfUoY6rPC5w7LhqAnvIlgekPoVW03LdZO1unIM=
open-cluster-management.io/sdk-go v0.15.0/go.mod h1:fi5WBsbC5K3txKb8eRLuP0Sim/Oqz/PHX18skAEyjiA=
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 h1:/U5vjBbQn3RChhv7P11uhYvCSm5G2GaIi5AIGBS6r4c=
Expand Down
126 changes: 91 additions & 35 deletions pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package agentdeploy
import (
"context"
"fmt"
"regexp"
"strings"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -167,17 +168,38 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
manifestConditions = append(manifestConditions, work.Status.ResourceStatus.Manifests...)
}

probeFields, healthChecker, err := s.analyzeWorkProber(s.agentAddon, cluster, addon)
probeFields, healthChecker, healthAllChecker, err := s.analyzeWorkProber(s.agentAddon, cluster, addon)
if err != nil {
// should not happen, return
return err
}

var resultFields []agent.ResultField

for _, field := range probeFields {
result := findResultByIdentifier(field.ResourceIdentifier, manifestConditions)
results := findResultsByIdentifier(field.ResourceIdentifier, manifestConditions)

// healthChecker will be ignored if healthAllChecker is set
if healthAllChecker != nil {
if len(results) != 0 {
resultFields = append(resultFields, results...)
}
continue
}

if healthChecker == nil {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionFalse,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
Message: fmt.Sprintf("health checker function is not set %v", err),
})
return nil
}

// if no results are returned. it is possible that work agent has not returned the feedback value.
// mark condition to unknown
if result == nil {
if len(results) == 0 {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionUnknown,
Expand All @@ -189,16 +211,29 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
return nil
}

err := healthChecker(field.ResourceIdentifier, *result)
if err != nil {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionFalse,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
})
return nil
for _, result := range results {
err := healthChecker(result.ResourceIdentifier, result.FeedbackResult)
if err != nil {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionFalse,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
})
return nil
}
}

}

if healthAllChecker != nil && healthAllChecker(resultFields) != nil {
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
Status: metav1.ConditionFalse,
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
})
return nil
}

meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Expand All @@ -214,21 +249,23 @@ func (s *healthCheckSyncer) analyzeWorkProber(
agentAddon agent.AgentAddon,
cluster *clusterv1.ManagedCluster,
addon *addonapiv1alpha1.ManagedClusterAddOn,
) ([]agent.ProbeField, agent.AddonHealthCheckFunc, error) {
) ([]agent.ProbeField, agent.AddonHealthCheckFunc, agent.AddonHealthCheckAllFunc, error) {

switch agentAddon.GetAgentAddonOptions().HealthProber.Type {
case agent.HealthProberTypeWork:
workProber := agentAddon.GetAgentAddonOptions().HealthProber.WorkProber
if workProber != nil {
return workProber.ProbeFields, workProber.HealthCheck, nil
return workProber.ProbeFields, workProber.HealthCheck, workProber.HealthCheckAll, nil
}
return nil, nil, fmt.Errorf("work prober is not configured")
return nil, nil, nil, fmt.Errorf("work prober is not configured")
case agent.HealthProberTypeDeploymentAvailability:
return s.analyzeDeploymentWorkProber(agentAddon, cluster, addon)
probeFields, heathChecker, err := s.analyzeDeploymentWorkProber(agentAddon, cluster, addon)
return probeFields, heathChecker, nil, err
case agent.HealthProberTypeWorkloadAvailability:
return s.analyzeWorkloadsWorkProber(agentAddon, cluster, addon)
probeFields, heathChecker, err := s.analyzeWorkloadsWorkProber(agentAddon, cluster, addon)
return probeFields, heathChecker, nil, err
default:
return nil, nil, fmt.Errorf("unsupported health prober type %s", agentAddon.GetAgentAddonOptions().HealthProber.Type)
return nil, nil, nil, fmt.Errorf("unsupported health prober type %s", agentAddon.GetAgentAddonOptions().HealthProber.Type)
}
}

Expand Down Expand Up @@ -294,27 +331,46 @@ func (s *healthCheckSyncer) analyzeWorkloadsWorkProber(
return probeFields, utils.WorkloadAvailabilityHealthCheck, nil
}

func findResultByIdentifier(identifier workapiv1.ResourceIdentifier, manifestConditions []workapiv1.ManifestCondition) *workapiv1.StatusFeedbackResult {
func findResultsByIdentifier(identifier workapiv1.ResourceIdentifier,
manifestConditions []workapiv1.ManifestCondition) []agent.ResultField {
var results []agent.ResultField
for _, status := range manifestConditions {
if identifier.Group != status.ResourceMeta.Group {
continue
}
if identifier.Resource != status.ResourceMeta.Resource {
continue
}
if identifier.Name != status.ResourceMeta.Name {
continue
}
if identifier.Namespace != status.ResourceMeta.Namespace {
continue
if resourceMatch(status.ResourceMeta, identifier) && len(status.StatusFeedbacks.Values) != 0 {
results = append(results, agent.ResultField{
ResourceIdentifier: workapiv1.ResourceIdentifier{
Group: status.ResourceMeta.Group,
Resource: status.ResourceMeta.Resource,
Name: status.ResourceMeta.Name,
Namespace: status.ResourceMeta.Namespace,
},
FeedbackResult: status.StatusFeedbacks,
})
}
}

if len(status.StatusFeedbacks.Values) == 0 {
return nil
}
return results
}

return &status.StatusFeedbacks
// compare two string, target may include *
func wildcardMatch(resource, target string) bool {
if resource == target || target == "*" {
return true
}

return nil
pattern := "^" + regexp.QuoteMeta(target) + "$"
pattern = strings.ReplaceAll(pattern, "\\*", ".*")

re, err := regexp.Compile(pattern)
if err != nil {
return false
}

return re.MatchString(resource)
}

func resourceMatch(resourceMeta workapiv1.ManifestResourceMeta, resource workapiv1.ResourceIdentifier) bool {
return resourceMeta.Group == resource.Group &&
resourceMeta.Resource == resource.Resource &&
wildcardMatch(resourceMeta.Namespace, resource.Namespace) &&
wildcardMatch(resourceMeta.Name, resource.Name)
}
Loading

0 comments on commit 12371fb

Please sign in to comment.