Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
honghan-wong committed Jan 8, 2024
1 parent f064828 commit b6d75be
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
12 changes: 11 additions & 1 deletion src/prometheus_alert_rules/general.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ groups:
- name: HardwareObserver
rules:
- alert: CollectorFailed
expr: '{__name__=~"(.*)_collector_failed"} == 1'
expr: '{__name__=~"(.*)_collector_failed",__name__!="ipmiselcollector_collector_failed"} == 1'
for: 0m
labels:
severity: error
Expand All @@ -11,3 +11,13 @@ groups:
description: |
A collector failed to fetch the metrics. Please reach out to hardware-observer maintainers.
LABELS = {{ $labels }}
- alert: IpmiSelCollectorFailed
expr: '{__name__="ipmiselcollector_collector_failed"} == 1'
for: 1m
labels:
severity: error
annotations:
summary: Ipmi Sel Collector failed. (instance {{ $labels.instance }})
description: |
Ipmi sel collector failed to fetch the metrics. Please reach out to hardware-observer maintainers.
LABELS = {{ $labels }}
14 changes: 14 additions & 0 deletions tests/unit/test_alert_rules/test_general.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ tests:
values: '1x15'
- series: ipmisel_collector_failed{instance="ubuntu-99", collector="ipmisel"}
values: '1x15'
- series: ipmiselcollector_collector_failed{instance="ubuntu-99", collector="IpmiSelCollector"}
values: '1x15'

alert_rule_test:
- eval_time: 0m
Expand All @@ -33,3 +35,15 @@ tests:
description: |
A collector failed to fetch the metrics. Please reach out to hardware-observer maintainers.
LABELS = map[__name__:ipmisel_collector_failed collector:ipmisel instance:ubuntu-99]
- eval_time: 1m
alertname: IpmiSelCollectorFailed
exp_alerts:
- exp_labels:
severity: error
instance: ubuntu-99
collector: IpmiSelCollector
exp_annotations:
summary: Ipmi Sel Collector failed. (instance ubuntu-99)
description: |
Ipmi sel collector failed to fetch the metrics. Please reach out to hardware-observer maintainers.
LABELS = map[__name__:ipmiselcollector_collector_failed collector:IpmiSelCollector instance:ubuntu-99]

0 comments on commit b6d75be

Please sign in to comment.