Skip to content

Commit

Permalink
fix(alerts): Increase duration before redfish alerts get triggered (#194
Browse files Browse the repository at this point in the history
)

Increase the duration before which redfish alerts get triggered. This
prevents false alerts during ocassional flapping.
  • Loading branch information
dashmage authored Mar 22, 2024
1 parent 1ddf60c commit 86f0d7e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
18 changes: 9 additions & 9 deletions src/prometheus_alert_rules/redfish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ groups:
rules:
- alert: RedfishCallFailed
expr: redfish_call_success == 0
for: 0m
for: 5m
labels:
severity: warning
annotations:
Expand All @@ -15,7 +15,7 @@ groups:
- alert: RedfishServiceUnavailable
expr: redfish_service_available == 0
for: 0m
for: 5m
labels:
severity: warning
annotations:
Expand All @@ -27,7 +27,7 @@ groups:
- alert: RedfishSensorHealthNotOk
expr: redfish_sensor_info{health!~"OK|N/A"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -39,7 +39,7 @@ groups:
- alert: RedfishProcessorHealthNotOk
expr: redfish_processor_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -50,7 +50,7 @@ groups:
- alert: RedfishStorageControllerHealthNotOk
expr: redfish_storage_controller_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -61,7 +61,7 @@ groups:
- alert: RedfishChassisHealthNotOk
expr: redfish_chassis_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -72,7 +72,7 @@ groups:
- alert: RedfishStorageDriveHealthNotOk
expr: redfish_storage_drive_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -83,7 +83,7 @@ groups:
- alert: RedfishMemoryDimmHealthNotOk
expr: redfish_memory_dimm_info{health!~"OK|NA"}
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand All @@ -94,7 +94,7 @@ groups:
- alert: RedfishSmartStorageHealthNotOk
expr: redfish_smart_storage_health == 0
for: 0m
for: 5m
labels:
severity: critical
annotations:
Expand Down
18 changes: 9 additions & 9 deletions tests/unit/test_alert_rules/test_redfish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ tests:
values: "0x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishCallFailed
exp_alerts:
- exp_labels:
Expand All @@ -29,7 +29,7 @@ tests:
values: "0x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishServiceUnavailable
exp_alerts:
- exp_labels:
Expand All @@ -48,7 +48,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishSensorHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -69,7 +69,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishProcessorHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -91,7 +91,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishStorageControllerHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -113,7 +113,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishChassisHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -134,7 +134,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishStorageDriveHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -156,7 +156,7 @@ tests:
values: "1x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishMemoryDimmHealthNotOk
exp_alerts:
- exp_labels:
Expand All @@ -177,7 +177,7 @@ tests:
values: "0x15"

alert_rule_test:
- eval_time: 0m
- eval_time: 10m
alertname: RedfishSmartStorageHealthNotOk
exp_alerts:
- exp_labels:
Expand Down

0 comments on commit 86f0d7e

Please sign in to comment.