Skip to content

Commit

Permalink
fix(alerts): Increase duration before redfish alerts get triggered
Browse files Browse the repository at this point in the history
Increase the duration before which redfish alerts get triggered. This
prevents false alerts during ocassional flapping.
  • Loading branch information
dashmage committed Mar 21, 2024
1 parent 7523866 commit 70e6cdd
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
18 changes: 9 additions & 9 deletions src/prometheus_alert_rules/redfish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ groups:
rules:
- alert: RedfishCallFailed
expr: redfish_call_success == 0
for: 0m
for: 5m
labels:
severity: warning
annotations:
Expand All @@ -15,7 +15,7 @@ groups:
- alert: RedfishServiceUnavailable
expr: redfish_service_available == 0
for: 0m
for: 5m
labels:
severity: warning
annotations:
Expand All @@ -27,7 +27,7 @@ groups:
- alert: RedfishSensorHealthNotOk
expr: redfish_sensor_info{health!~"OK|N/A"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -39,7 +39,7 @@ groups:
- alert: RedfishProcessorHealthNotOk
expr: redfish_processor_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -50,7 +50,7 @@ groups:
- alert: RedfishStorageControllerHealthNotOk
expr: redfish_storage_controller_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -61,7 +61,7 @@ groups:
- alert: RedfishChassisHealthNotOk
expr: redfish_chassis_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -72,7 +72,7 @@ groups:
- alert: RedfishStorageDriveHealthNotOk
expr: redfish_storage_drive_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -83,7 +83,7 @@ groups:
- alert: RedfishMemoryDimmHealthNotOk
expr: redfish_memory_dimm_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -94,7 +94,7 @@ groups:
- alert: RedfishSmartStorageHealthNotOk
expr: redfish_smart_storage_health == 0
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand Down
18 changes: 9 additions & 9 deletions tests/unit/test_alert_rules/test_redfish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ tests:
values: "0x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishCallFailed
exp_alerts:
- exp_labels:
Expand All @@ -29,7 +29,7 @@ tests:
values: "0x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishServiceUnavailable
exp_alerts:
- exp_labels:
Expand All @@ -48,7 +48,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishSensorHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -69,7 +69,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishProcessorHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -91,7 +91,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishStorageControllerHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -113,7 +113,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishChassisHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -134,7 +134,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishStorageDriveHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -156,7 +156,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishMemoryDimmHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -177,7 +177,7 @@ tests:
values: "0x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishSmartStorageHealthNotOk
exp_alerts:
- exp_labels:
Expand Down

0 comments on commit 70e6cdd

Please sign in to comment.