From ec0fa220009dc9dc4a30310f6e1514d8bfc87b83 Mon Sep 17 00:00:00 2001 From: Maha Benzekri Date: Tue, 20 Aug 2024 17:28:57 +0200 Subject: [PATCH] fix --- monitoring/pra/alerts.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/monitoring/pra/alerts.yaml b/monitoring/pra/alerts.yaml index f3e3c7f2e4..3c05c65098 100644 --- a/monitoring/pra/alerts.yaml +++ b/monitoring/pra/alerts.yaml @@ -27,9 +27,9 @@ groups: - alert: DrResourcePausedWhileOtherRunning expr: | - (sum(up{job="${kafka_connect_src_job}", namespace="${namespace}"}) or vector(0)) + (sum(up{drSinkInstancejob="",job="${kafka_connect_src_job}", namespace="${namespace}"}) or vector(0)) != - (sum(up{job="${kafka_connect_sink_job}", namespace="${namespace}"}) or vector(0)) + (sum(up{drSinkInstancejob="${dr_sink_instance}", job="${kafka_connect_sink_job}", namespace="${namespace}"}) or vector(0)) for: 1m labels: severity: warning @@ -39,7 +39,7 @@ groups: - alert: DrResourcePausedForTooLong expr: | - (sum(up{job="${kafka_connect_src_job}", namespace="${namespace}"}) or vector(0)) == 0 + (sum(up{drSinkInstancejob="", job="${kafka_connect_src_job}", namespace="${namespace}"}) or vector(0)) == 0 for: 5m labels: severity: warning @@ -49,9 +49,9 @@ groups: - alert: KafkaConnectOutageSource expr: | - sum(rate(kafka_connect_task_error_total_record_errors{job="${kafka_connect_src_job}"}[$__rate_interval])) > 0 + sum(rate(kafka_connect_task_error_total_record_errors{drSinkInstancejob="", job="${kafka_connect_src_job}"}[$__rate_interval])) > 0 or - sum(rate(kafka_connect_task_error_total_record_failures{job="${kafka_connect_src_job}"}[$__rate_interval])) > 0 + sum(rate(kafka_connect_task_error_total_record_failures{drSinkInstancejob="", job="${kafka_connect_src_job}"}[$__rate_interval])) > 0 for: 1m labels: severity: critical @@ -62,9 +62,9 @@ groups: - alert: KafkaConnectOutageSink expr: | - sum(rate(kafka_connect_task_error_total_record_errors{job="${kafka_connect_sink_job}"}[$__rate_interval])) > 0 + sum(rate(kafka_connect_task_error_total_record_errors{drSinkInstancejob="${dr_sink_instance}", job="${kafka_connect_sink_job}"}[$__rate_interval])) > 0 or - sum(rate(kafka_connect_task_error_total_record_failures{job="${kafka_connect_sink_job}"}[$__rate_interval])) > 0 + sum(rate(kafka_connect_task_error_total_record_failures{drSinkInstancejob="${dr_sink_instance}", job="${kafka_connect_sink_job}"}[$__rate_interval])) > 0 for: 1m labels: severity: critical