From 86f0d7e0dd8b0339740518b27eb224fb1fcaf2b9 Mon Sep 17 00:00:00 2001 From: Ashley James Date: Fri, 22 Mar 2024 15:43:15 +0530 Subject: [PATCH] fix(alerts): Increase duration before redfish alerts get triggered (#194) Increase the duration before which redfish alerts get triggered. This prevents false alerts during ocassional flapping. --- src/prometheus_alert_rules/redfish.yaml | 18 +++++++++--------- tests/unit/test_alert_rules/test_redfish.yaml | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/prometheus_alert_rules/redfish.yaml b/src/prometheus_alert_rules/redfish.yaml index bfdc5fff..3ca30b01 100644 --- a/src/prometheus_alert_rules/redfish.yaml +++ b/src/prometheus_alert_rules/redfish.yaml @@ -3,7 +3,7 @@ groups: rules: - alert: RedfishCallFailed expr: redfish_call_success == 0 - for: 0m + for: 5m labels: severity: warning annotations: @@ -15,7 +15,7 @@ groups: - alert: RedfishServiceUnavailable expr: redfish_service_available == 0 - for: 0m + for: 5m labels: severity: warning annotations: @@ -27,7 +27,7 @@ groups: - alert: RedfishSensorHealthNotOk expr: redfish_sensor_info{health!~"OK|N/A"} - for: 0m + for: 5m labels: severity: critical annotations: @@ -39,7 +39,7 @@ groups: - alert: RedfishProcessorHealthNotOk expr: redfish_processor_info{health!~"OK|NA"} - for: 0m + for: 5m labels: severity: critical annotations: @@ -50,7 +50,7 @@ groups: - alert: RedfishStorageControllerHealthNotOk expr: redfish_storage_controller_info{health!~"OK|NA"} - for: 0m + for: 5m labels: severity: critical annotations: @@ -61,7 +61,7 @@ groups: - alert: RedfishChassisHealthNotOk expr: redfish_chassis_info{health!~"OK|NA"} - for: 0m + for: 5m labels: severity: critical annotations: @@ -72,7 +72,7 @@ groups: - alert: RedfishStorageDriveHealthNotOk expr: redfish_storage_drive_info{health!~"OK|NA"} - for: 0m + for: 5m labels: severity: critical annotations: @@ -83,7 +83,7 @@ groups: - alert: RedfishMemoryDimmHealthNotOk expr: redfish_memory_dimm_info{health!~"OK|NA"} - for: 0m + for: 5m labels: severity: critical annotations: @@ -94,7 +94,7 @@ groups: - alert: RedfishSmartStorageHealthNotOk expr: redfish_smart_storage_health == 0 - for: 0m + for: 5m labels: severity: critical annotations: diff --git a/tests/unit/test_alert_rules/test_redfish.yaml b/tests/unit/test_alert_rules/test_redfish.yaml index fef3efa0..a10bd2ab 100644 --- a/tests/unit/test_alert_rules/test_redfish.yaml +++ b/tests/unit/test_alert_rules/test_redfish.yaml @@ -10,7 +10,7 @@ tests: values: "0x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishCallFailed exp_alerts: - exp_labels: @@ -29,7 +29,7 @@ tests: values: "0x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishServiceUnavailable exp_alerts: - exp_labels: @@ -48,7 +48,7 @@ tests: values: "1x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishSensorHealthNotOk exp_alerts: - exp_labels: @@ -69,7 +69,7 @@ tests: values: "1x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishProcessorHealthNotOk exp_alerts: - exp_labels: @@ -91,7 +91,7 @@ tests: values: "1x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishStorageControllerHealthNotOk exp_alerts: - exp_labels: @@ -113,7 +113,7 @@ tests: values: "1x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishChassisHealthNotOk exp_alerts: - exp_labels: @@ -134,7 +134,7 @@ tests: values: "1x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishStorageDriveHealthNotOk exp_alerts: - exp_labels: @@ -156,7 +156,7 @@ tests: values: "1x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishMemoryDimmHealthNotOk exp_alerts: - exp_labels: @@ -177,7 +177,7 @@ tests: values: "0x15" alert_rule_test: - - eval_time: 0m + - eval_time: 10m alertname: RedfishSmartStorageHealthNotOk exp_alerts: - exp_labels: