From 18d4c2b51372f9fe1997600a7ff73160aca2401c Mon Sep 17 00:00:00 2001 From: KillianG Date: Tue, 13 Aug 2024 14:28:25 +0200 Subject: [PATCH] Rename service label to kafkaCluster Issue: ZENKO-4857 --- monitoring/kafka/alerts.test.yaml | 24 ++++++++++++------------ monitoring/kafka/alerts.yaml | 16 ++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/monitoring/kafka/alerts.test.yaml b/monitoring/kafka/alerts.test.yaml index 799585899d..59619689d8 100644 --- a/monitoring/kafka/alerts.test.yaml +++ b/monitoring/kafka/alerts.test.yaml @@ -35,7 +35,7 @@ tests: description: 'Kafka: Broker count is down' exp_labels: severity: warning - service: ${service} + kafkaCluster: ${cluster} - alertname: BrokersCountCritical eval_time: 3m exp_alerts: [] @@ -50,7 +50,7 @@ tests: description: 'Kafka: Broker count is 0' exp_labels: namespace: zenko - service: ${service} + kafkaCluster: ${cluster} severity: critical # ActiveControllerCritical @@ -79,7 +79,7 @@ tests: summary: 'Kafka: No active controller' exp_labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} - alertname: ActiveControllerCritical eval_time: 3m exp_alerts: [] @@ -110,7 +110,7 @@ tests: summary: 'Kafka: 1 under-replicated partitons' exp_labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} - alertname: UnderReplicatedPartitions eval_time: 3m exp_alerts: @@ -122,7 +122,7 @@ tests: summary: 'Kafka: 2 under-replicated partitons' exp_labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} # OfflinePartitons ################################################################################################## @@ -151,7 +151,7 @@ tests: summary: 'Kafka: 1 offline partitons' exp_labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} - alertname: OfflinePartitons eval_time: 3m exp_alerts: @@ -164,7 +164,7 @@ tests: summary: 'Kafka: 2 offline partitons' exp_labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} # RemainingDiskSpaceWarning ################################################################################################## @@ -204,7 +204,7 @@ tests: namespace: zenko persistentvolumeclaim: artesca-data-base-queue-1 severity: warning - service: ${service} + kafkaCluster: ${cluster} - alertname: RemainingDiskSpaceWarning eval_time: 5d8h exp_alerts: [] @@ -232,7 +232,7 @@ tests: summary: Zookeeper Sync Disconected exp_labels: severity: warning - service: ${service} + kafkaCluster: ${cluster} # ConsumerLagWarning ################################################################################################## @@ -282,7 +282,7 @@ tests: cluster_name: artesca-data-base-queue group: notification severity: warning - service: ${service} + kafkaCluster: ${cluster} - alertname: ConsumerLagWarning eval_time: 20m exp_alerts: @@ -299,7 +299,7 @@ tests: cluster_name: artesca-data-base-queue group: replication severity: warning - service: ${service} + kafkaCluster: ${cluster} - exp_annotations: description: | Kafka consumer lag has been more more than 300 seconds @@ -313,4 +313,4 @@ tests: cluster_name: artesca-data-base-queue group: notification severity: warning - service: ${service} + kafkaCluster: ${cluster} diff --git a/monitoring/kafka/alerts.yaml b/monitoring/kafka/alerts.yaml index 0de57b0f85..4deda4506a 100644 --- a/monitoring/kafka/alerts.yaml +++ b/monitoring/kafka/alerts.yaml @@ -34,7 +34,7 @@ groups: for: 1m labels: severity: warning - service: ${service} + kafkaCluster: ${cluster} annotations: summary: 'Not all expected brokers are online.' description: 'Kafka: Broker count is down' @@ -45,7 +45,7 @@ groups: for: 1m labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} annotations: summary: 'No Brokers online' description: 'Kafka: Broker count is 0' @@ -55,7 +55,7 @@ groups: for: 1m labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} annotations: description: >- No broker in the cluster is reporting as the active controller in the last 1 minute interval. During steady state there should @@ -67,7 +67,7 @@ groups: for: 1m labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} annotations: description: >- Under-replicated partitions means that one or more replicas are not available. This is usually because a broker is down. Restart @@ -79,7 +79,7 @@ groups: for: 1m labels: severity: critical - service: ${service} + kafkaCluster: ${cluster} annotations: description: >- After successful leader election, if the leader for partition dies, then the partition moves to the OfflinePartition state. @@ -96,7 +96,7 @@ groups: for: 2m labels: severity: warning - service: ${service} + kafkaCluster: ${cluster} annotations: description: 'Kafka Broker has low disk space' summary: 'Kafka Broker has low disk space' @@ -107,7 +107,7 @@ groups: for: 1m labels: severity: warning - service: ${service} + kafkaCluster: ${cluster} annotations: summary: 'Zookeeper Sync Disconected' description: 'Kafka Zookeeper Sync Disconected' @@ -123,7 +123,7 @@ groups: for: 5m labels: severity: warning - service: ${service} + kafkaCluster: ${cluster} annotations: summary: 'Kafka: consumer lag is too high for {{ $labels.group }}' description: |