From 91128dccc2adb62187666451517a37880cfdbc92 Mon Sep 17 00:00:00 2001 From: jneo8 Date: Thu, 23 Nov 2023 19:24:46 +0800 Subject: [PATCH] feat(alert): Add collector fail alert --- src/prometheus_alert_rules/general.yaml | 13 +++++++ tests/unit/test_alert_rules/test_general.yaml | 35 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 src/prometheus_alert_rules/general.yaml create mode 100644 tests/unit/test_alert_rules/test_general.yaml diff --git a/src/prometheus_alert_rules/general.yaml b/src/prometheus_alert_rules/general.yaml new file mode 100644 index 00000000..78df8b7c --- /dev/null +++ b/src/prometheus_alert_rules/general.yaml @@ -0,0 +1,13 @@ +groups: +- name: HardwareObserver + rules: + - alert: CollectorFailed + expr: '{__name__=~"(.*)_collector_failed"} == 1' + for: 0m + labels: + severity: warning + annotations: + summary: Controller fetch failed. (instance {{ $labels.instance }}) + description: | + Collrector fetch failed. Please reach out to hardware-observer maintainers. + LABELS = {{ $labels }} diff --git a/tests/unit/test_alert_rules/test_general.yaml b/tests/unit/test_alert_rules/test_general.yaml new file mode 100644 index 00000000..61c10a3e --- /dev/null +++ b/tests/unit/test_alert_rules/test_general.yaml @@ -0,0 +1,35 @@ +rule_files: + - ../../../src/prometheus_alert_rules/general.yaml + +evaluation_interval: 1m + +tests: + - interval: 1m + input_series: + - series: ipmidcmicollector_collector_failed{instance="ubuntu-99", collector="IPMIDCMICollector"} + values: '1x15' + - series: ipmiselcollector_collector_failed{instance="ubuntu-99", collector="IPMISELCollector"} + values: '1x15' + + alert_rule_test: + - eval_time: 0m + alertname: CollectorFailed + exp_alerts: + - exp_labels: + severity: warning + instance: ubuntu-99 + collector: IPMIDCMICollector + exp_annotations: + summary: Controller fetch failed. (instance ubuntu-99) + description: | + Collrector fetch failed. Please reach out to hardware-observer maintainers. + LABELS = map[__name__:ipmidcmicollector_collector_failed collector:IPMIDCMICollector instance:ubuntu-99] + - exp_labels: + severity: warning + instance: ubuntu-99 + collector: IPMISELCollector + exp_annotations: + summary: Controller fetch failed. (instance ubuntu-99) + description: | + Collrector fetch failed. Please reach out to hardware-observer maintainers. + LABELS = map[__name__:ipmiselcollector_collector_failed collector:IPMISELCollector instance:ubuntu-99]