Skip to content

Commit

Permalink
feat(alert): Add collector fail alert (canonical#117)
Browse files Browse the repository at this point in the history
* feat(alert): Add collector fail alert

* fix: typo and wording fix

* fix: Update severity level to critical

* fix: change alert severity to error
  • Loading branch information
jneo8 authored Nov 29, 2023
1 parent b425728 commit 5b041b5
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/prometheus_alert_rules/general.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
groups:
- name: HardwareObserver
rules:
- alert: CollectorFailed
expr: '{__name__=~"(.*)_collector_failed"} == 1'
for: 0m
labels:
severity: error
annotations:
summary: Collector failed. (instance {{ $labels.instance }})
description: |
A collector failed to fetch the metrics. Please reach out to hardware-observer maintainers.
LABELS = {{ $labels }}
35 changes: 35 additions & 0 deletions tests/unit/test_alert_rules/test_general.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
rule_files:
- ../../../src/prometheus_alert_rules/general.yaml

evaluation_interval: 1m

tests:
- interval: 1m
input_series:
- series: ipmidcmicollector_collector_failed{instance="ubuntu-99", collector="IPMIDCMICollector"}
values: '1x15'
- series: ipmiselcollector_collector_failed{instance="ubuntu-99", collector="IPMISELCollector"}
values: '1x15'

alert_rule_test:
- eval_time: 0m
alertname: CollectorFailed
exp_alerts:
- exp_labels:
severity: error
instance: ubuntu-99
collector: IPMIDCMICollector
exp_annotations:
summary: Collector failed. (instance ubuntu-99)
description: |
A collector failed to fetch the metrics. Please reach out to hardware-observer maintainers.
LABELS = map[__name__:ipmidcmicollector_collector_failed collector:IPMIDCMICollector instance:ubuntu-99]
- exp_labels:
severity: error
instance: ubuntu-99
collector: IPMISELCollector
exp_annotations:
summary: Collector failed. (instance ubuntu-99)
description: |
A collector failed to fetch the metrics. Please reach out to hardware-observer maintainers.
LABELS = map[__name__:ipmiselcollector_collector_failed collector:IPMISELCollector instance:ubuntu-99]

0 comments on commit 5b041b5

Please sign in to comment.