linstor resource labels and prometheus rules added

Signed-off-by: Denis Shipkov <[email protected]>
deckhouse · Nov 22, 2024 · d25e062 · d25e062
1 parent afc6053
commit d25e062
Show file tree

Hide file tree

Showing 2 changed files with 210 additions and 24 deletions.
diff --git a/images/sds-replicated-volume-controller/src/pkg/controller/linstor_resources_watcher.go b/images/sds-replicated-volume-controller/src/pkg/controller/linstor_resources_watcher.go
@@ -39,6 +39,7 @@ const (
 	linstorResourcesWatcherCtrlName         = "linstor-resources-watcher-controller"
 	missMatchedLabel                        = "storage.deckhouse.io/linstor-settings-mismatch"
 	unableToSetQuorumMinimumRedundancyLabel = "storage.deckhouse.io/unable-to-set-quorum-minimum-redundancy"
+	pvNotEnoughReplicasLabel                = "storage.deckhouse.io/pv-not-enough-replicas"
 	PVCSIDriver                             = "replicated.csi.storage.deckhouse.io"
 	replicasOnSameRGKey                     = "replicas_on_same"
 	replicasOnDifferentRGKey                = "replicas_on_different"
@@ -66,6 +67,7 @@ var (
 	badLabels = []string{missMatchedLabel, unableToSetQuorumMinimumRedundancyLabel}
 )
 
+
 func NewLinstorResourcesWatcher(
 	mgr manager.Manager,
 	lc *lapi.Client,
@@ -112,8 +114,23 @@ func NewLinstorResourcesWatcher(
 				rgMap[rg.Name] = rg
 			}
 
-			ReconcileParams(ctx, log, cl, lc, scMap, rdMap, rgMap)
-			ReconcileTieBreaker(ctx, log, lc, rdMap, rgMap)
+			pvsList, err := GetListPV(ctx, cl)
+			if err != nil {
+				log.Error(err, "[NewLinstorResourcesWatcher] unable to get Persistent Volumes")
+			}
+
+			resMap := make(map[string][]lapi.Resource, len(rdMap))
+			for name := range rdMap {
+				res, err := lc.Resources.GetAll(ctx, name)
+				if err != nil {
+					log.Error(err, fmt.Sprintf("[NewLinstorResourcesWatcher] unable to get Linstor Resources, name: %s", name))
+				}
+				resMap[name] = res
+			}
+
+			ReconcileParams(ctx, log, cl, lc, scMap, rdMap, rgMap, pvsList)
+			ReconcileTieBreaker(ctx, log, lc, rdMap, rgMap, resMap)
+			ReconcilePVReplicas(ctx, log, cl, lc, rdMap, rgMap, resMap, pvsList)
 
 			log.Info("[NewLinstorResourcesWatcher] ends reconcile")
 		}
@@ -128,12 +145,9 @@ func ReconcileParams(
 	scs map[string]v1.StorageClass,
 	rds map[string]lapi.ResourceDefinitionWithVolumeDefinition,
 	rgs map[string]lapi.ResourceGroup,
+	pvs []core.PersistentVolume,
 ) {
 	log.Info("[ReconcileParams] starts work")
-	pvs, err := GetListPV(ctx, cl)
-	if err != nil {
-		log.Error(err, "[ReconcileParams] unable to get Persistent Volumes")
-	}
 
 	for _, pv := range pvs {
 		if pv.Spec.CSI != nil && pv.Spec.CSI.Driver == PVCSIDriver {
@@ -152,7 +166,7 @@ func ReconcileParams(
 				if slices.Contains(missMatched, quorumMinimumRedundancyWithoutPrefixKey) && sc.Parameters[QuorumMinimumRedundancyWithPrefixSCKey] != "" {
 					log.Info(fmt.Sprintf("[ReconcileParams] the quorum-minimum-redundancy value is set in the Storage Class %s, value: %s, but it is not match the Resource Group %s value %s", sc.Name, sc.Parameters[QuorumMinimumRedundancyWithPrefixSCKey], rg.Name, rg.Props[quorumMinimumRedundancyWithPrefixRGKey]))
 					log.Info(fmt.Sprintf("[ReconcileParams] the quorum-minimum-redundancy value will be set to the Resource Group %s, value: %s", rg.Name, sc.Parameters[QuorumMinimumRedundancyWithPrefixSCKey]))
-					err = setQuorumMinimumRedundancy(ctx, lc, sc.Parameters[QuorumMinimumRedundancyWithPrefixSCKey], rg.Name)
+					err := setQuorumMinimumRedundancy(ctx, lc, sc.Parameters[QuorumMinimumRedundancyWithPrefixSCKey], rg.Name)
 
 					if err != nil {
 						log.Error(err, fmt.Sprintf("[ReconcileParams] unable to set the quorum-minimum-redundancy value, name: %s", pv.Name))
@@ -181,7 +195,7 @@ func ReconcileParams(
 
 				if updated {
 					pv.Labels = newLabels
-					err = UpdatePV(ctx, cl, &pv)
+					err := UpdatePV(ctx, cl, &pv)
 					if err != nil {
 						log.Error(err, fmt.Sprintf("[ReconcileParams] unable to update the PV, name: %s", pv.Name))
 					}
@@ -194,7 +208,7 @@ func ReconcileParams(
 
 				if updated {
 					pv.Labels = newLabels
-					err = UpdatePV(ctx, cl, &pv)
+					err := UpdatePV(ctx, cl, &pv)
 					if err != nil {
 						log.Error(err, fmt.Sprintf("[ReconcileParams] unable to update the PV, name: %s", pv.Name))
 					}
@@ -206,30 +220,102 @@ func ReconcileParams(
 	log.Info("[ReconcileParams] ends work")
 }
 
-func ReconcileTieBreaker(
+func ReconcilePVReplicas(
 	ctx context.Context,
 	log logger.Logger,
+	cl client.Client,
 	lc *lapi.Client,
 	rds map[string]lapi.ResourceDefinitionWithVolumeDefinition,
 	rgs map[string]lapi.ResourceGroup,
+	res map[string][]lapi.Resource,
+	pvs []core.PersistentVolume,
 ) {
-	log.Info("[ReconcileTieBreaker] starts work")
+	log.Info("[ReconcilePVReplicas] starts work")
+
+	for _, pv := range pvs {
+		if pv.Spec.CSI != nil && pv.Spec.CSI.Driver == PVCSIDriver {
+			RGName := rds[pv.Name].ResourceGroupName
+			rg := rgs[RGName]
+			log.Debug(fmt.Sprintf("[ReconcilePVReplicas] PV: %s, RG: %s", pv.Name, rg.Name))
+
+			replicasErrLevel := checkPVMinReplicasCount(ctx, log, lc, rg, res[pv.Name])
+
+			if pv.Labels == nil {
+				pv.Labels = make(map[string]string)
+			}
 
-	allResources := make(map[string][]lapi.Resource, len(rds)*3)
-	for name := range rds {
-		res, err := lc.Resources.GetAll(ctx, name)
+			origLabelVal, exists := pv.Labels[pvNotEnoughReplicasLabel]
+			log.Debug(fmt.Sprintf("[ReconcilePVReplicas] Update label \"%s\", old: \"%s\", new: \"%s\"", pvNotEnoughReplicasLabel, origLabelVal, replicasErrLevel))
+
+			upd := false
+			if replicasErrLevel == "" && exists {
+				delete(pv.Labels, pvNotEnoughReplicasLabel)
+				upd = true
+			}
+			if replicasErrLevel != "" && replicasErrLevel != origLabelVal {
+				pv.Labels[pvNotEnoughReplicasLabel] = replicasErrLevel
+				upd = true
+			}
+
+			if upd {
+				err := UpdatePV(ctx, cl, &pv)
+				if err != nil {
+					log.Error(err, fmt.Sprintf("[ReconcilePVReplicas] unable to update the PV, name: %s", pv.Name))
+				}
+			}
+		}
+	}
+
+	log.Info("[ReconcilePVReplicas] ends work")
+}
+
+func checkPVMinReplicasCount(ctx context.Context, log logger.Logger, lc *lapi.Client, rg lapi.ResourceGroup, resList []lapi.Resource) string {
+	placeCount := int(rg.SelectFilter.PlaceCount)
+	upVols := 0
+
+	if placeCount <= 0 {
+		return ""
+	}
+
+	for _, r := range resList {
+		volList, err := lc.Resources.GetVolumes(ctx, r.Name, r.NodeName)
 		if err != nil {
-			log.Error(err, fmt.Sprintf("[ReconcileTieBreaker] unable to get Linstor Resources by the Resource Definition, name: %s", name))
+			log.Error(err, fmt.Sprintf("[checkPVMinReplicasCount] unable to get Linstor Resources Volumes, name: %s, node: %s", r.Name, r.NodeName))
+		}
+
+		for _, v := range volList {
+			if v.State.DiskState == "UpToDate" {
+				upVols += 1
+			}
 		}
+	}
 
-		allResources[name] = res
+	if upVols >= placeCount {
+		return ""
+	} else if upVols <= 1 {
+		return "fatal"
+	} else if (upVols*100)/placeCount <= 50 {
+		return "error"
+	} else {
+		return "warning"
 	}
+}
+
+func ReconcileTieBreaker(
+	ctx context.Context,
+	log logger.Logger,
+	lc *lapi.Client,
+	rds map[string]lapi.ResourceDefinitionWithVolumeDefinition,
+	rgs map[string]lapi.ResourceGroup,
+	res map[string][]lapi.Resource,
+) {
+	log.Info("[ReconcileTieBreaker] starts work")
 
 	var (
 		nodes []lapi.Node
 		err   error
 	)
-	for name, resources := range allResources {
+	for name, resources := range res {
 		if len(resources) == 0 {
 			log.Warning(fmt.Sprintf("[ReconcileTieBreaker] no actual Linstor Resources for the Resource Definition, name: %s", name))
 			continue
@@ -300,7 +386,9 @@ func getNodeForTieBreaker(
 	for _, node := range unusedNodes {
 		log.Trace(fmt.Sprintf("[getNodeForTieBreaker] resource %s does not use a node %s", resources[0].Name, node.Name))
 	}
-	rg := getResourceGroupByResource(resources[0].Name, rds, rgs)
+
+	RGName := rds[resources[0].Name].ResourceGroupName
+	rg := rgs[RGName]
 
 	if key, exist := rg.Props[replicasOnSameRGKey]; exist {
 		unusedNodes = filterNodesByReplicasOnSame(unusedNodes, key)
@@ -389,10 +477,6 @@ func filterNodesByReplicasOnSame(nodes []lapi.Node, key string) []lapi.Node {
 	return filtered
 }
 
-func getResourceGroupByResource(resourceName string, rds map[string]lapi.ResourceDefinitionWithVolumeDefinition, rgs map[string]lapi.ResourceGroup) lapi.ResourceGroup {
-	return rgs[rds[resourceName].ResourceGroupName]
-}
-
 func filterOutUsedNodes(nodes []lapi.Node, resources []lapi.Resource) []lapi.Node {
 	unusedNodes := make([]lapi.Node, 0, len(nodes))
 	resNodes := make(map[string]struct{}, len(resources))

diff --git a/monitoring/prometheus-rules/replicated-pv-with-incorrect-settings.yaml b/monitoring/prometheus-rules/replicated-pv-with-incorrect-settings.yaml
@@ -13,11 +13,11 @@
         plk_grouped_by__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
         summary: Replicated PVs has incorrect settings
         description: |
-          There are persistent volumes in the cluster that were created before migration to ReplicatedStorageClass. 
+          There are persistent volumes in the cluster that were created before migration to ReplicatedStorageClass.
           You can recreate it, or add the label storage.deckhouse.io/linstor-settings-mismatch-ignore!=true to ignore it for the PV.
           Please note that in the future, when transitioning from LINSTOR to a new controller, the settings for all such PVs will be automatically modified to match the current StorageClass settings.
           
-          You can view all of such PV with command          
+          You can view all of such PV with command
           `kubectl get pv -l storage.deckhouse.io/linstor-settings-mismatch=true,storage.deckhouse.io/linstor-settings-mismatch-ignore!=true`
           
           Also, you can add label for all incorrect PVs
@@ -38,3 +38,105 @@
           There are persistent volumes in the cluster that has incorrect quorum-minimum-redundancy setting.
           
           Please, contact tech support for assistance.
+    - alert: ReplicatedPVIncorrectReplicasCountFatalS4
+      expr: count(kube_persistentvolume_labels{label_storage_deckhouse_io_pv_not_enough_replicas=~"fatal"}) > 0
+      for: 15m
+      labels:
+        severity_level: "4"
+        tier: cluster
+      annotations:
+        plk_markup_format: "markdown"
+        plk_protocol_version: "1"
+        plk_create_group_if_not_exists__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        plk_grouped_by__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        summary: Replicated PVs has not enough replicas
+        description: |
+          Persistent volumes in the cluster has less then 2 replicas (set of UpToDate resources)
+          
+          You can get minimal limit for StorageClass with command `kubectl get sc -o yaml | grep -E "(\sname|placementCount)"`
+          And view all Resource States with `linstor r l`
+    - alert: ReplicatedPVIncorrectReplicasCountErrorS4
+      expr: count(kube_persistentvolume_labels{label_storage_deckhouse_io_pv_not_enough_replicas=~"error|fatal"}) > 0
+      for: 30m
+      labels:
+        severity_level: "4"
+        tier: cluster
+      annotations:
+        plk_markup_format: "markdown"
+        plk_protocol_version: "1"
+        plk_create_group_if_not_exists__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        plk_grouped_by__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        summary: Replicated PVs has not enough replicas
+        description: |
+          Persistent volumes in the cluster has not enough replicas for quorum for 30min (set of UpToDate resources)
+          
+          You can get minimal limit for StorageClass with command `kubectl get sc -o yaml | grep -E "(\sname|placementCount)"`
+          And view all Resource States with `linstor r l`
+    - alert: ReplicatedPVIncorrectReplicasCountWarningS4
+      expr: count(kube_persistentvolume_labels{label_storage_deckhouse_io_pv_not_enough_replicas=~"warning|error|fatal"}) > 0
+      for: 24h
+      labels:
+        severity_level: "4"
+        tier: cluster
+      annotations:
+        plk_markup_format: "markdown"
+        plk_protocol_version: "1"
+        plk_create_group_if_not_exists__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        plk_grouped_by__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        summary: Replicated PVs has not enough replicas
+        description: |
+          Persistent volumes in the cluster has not enough replicas for long time (set of UpToDate resources less than minimal count)
+          
+          You can get minimal limit for StorageClass with command `kubectl get sc -o yaml | grep -E "(\sname|placementCount)"`
+          And view all Resource States with `linstor r l`
+    - alert: ReplicatedPVIncorrectReplicasCountErrorS5
+      expr: count(kube_persistentvolume_labels{label_storage_deckhouse_io_pv_not_enough_replicas=~"error|fatal"}) > 0
+      for: 15m
+      labels:
+        severity_level: "5"
+        tier: cluster
+      annotations:
+        plk_markup_format: "markdown"
+        plk_protocol_version: "1"
+        plk_create_group_if_not_exists__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        plk_grouped_by__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        summary: Replicated PVs has not enough replicas
+        description: |
+          Persistent volumes in the cluster has not enough replicas for quorum for 15min (set of UpToDate resources)
+          
+          You can get minimal limit for StorageClass with command `kubectl get sc -o yaml | grep -E "(\sname|placementCount)"`
+          And view all Resource States with `linstor r l`
+    - alert: ReplicatedPVIncorrectReplicasCountWarningS5
+      expr: count(kube_persistentvolume_labels{label_storage_deckhouse_io_pv_not_enough_replicas=~"warning|error|fatal"}) > 0
+      for: 30m
+      labels:
+        severity_level: "5"
+        tier: cluster
+      annotations:
+        plk_markup_format: "markdown"
+        plk_protocol_version: "1"
+        plk_create_group_if_not_exists__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        plk_grouped_by__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        summary: Replicated PVs has not enough replicas
+        description: |
+          Persistent volumes in the cluster has not enough replicas for 30min (set of UpToDate resources less than minimal count)
+          
+          You can get minimal limit for StorageClass with command `kubectl get sc -o yaml | grep -E "(\sname|placementCount)"`
+          And view all Resource States with `linstor r l`
+    - alert: ReplicatedPVIncorrectReplicasCountWarningS6
+      expr: count(kube_persistentvolume_labels{label_storage_deckhouse_io_pv_not_enough_replicas=~"warning|error|fatal"}) > 0
+      for: 15m
+      labels:
+        severity_level: "6"
+        tier: cluster
+      annotations:
+        plk_markup_format: "markdown"
+        plk_protocol_version: "1"
+        plk_create_group_if_not_exists__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        plk_grouped_by__d8_drbd_device_health: "ReplicatedPVSettingsCheck,tier=~tier,prometheus=deckhouse,kubernetes=~kubernetes"
+        summary: Replicated PVs has not enough replicas
+        description: |
+          Persistent volumes in the cluster has not enough replicas (set of UpToDate resources less than minimal count)
+          
+          You can get minimal limit for StorageClass with command `kubectl get sc -o yaml | grep -E "(\sname|placementCount)"`
+          And view all Resource States with `linstor r l`