From d7d6c321468e10631e3741a1b24a7e0242359c83 Mon Sep 17 00:00:00 2001 From: Yurii Kondrakov Date: Fri, 13 Dec 2024 14:29:48 -0500 Subject: [PATCH] Fix SmartAttributeWarning alert (#375) * Fix SmartAttributeWarning alert * Fix alert unit test --- src/prometheus_alert_rules/smart.yaml | 4 ++-- tests/unit/test_alert_rules/test_smart.yaml | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/prometheus_alert_rules/smart.yaml b/src/prometheus_alert_rules/smart.yaml index b067fad2..d4a31dfb 100644 --- a/src/prometheus_alert_rules/smart.yaml +++ b/src/prometheus_alert_rules/smart.yaml @@ -88,7 +88,7 @@ groups: - alert: SmartAttributeWarning # based on https://www.backblaze.com/blog/what-smart-stats-indicate-hard-drive-failures/ - expr: smartctl_device_attribute{attribute_id=~"5|187|188|197|198"} > 0 + expr: smartctl_device_attribute{attribute_id=~"5|187|188|197|198", attribute_value_type="raw"} > 0 for: 2m labels: severity: warning @@ -124,4 +124,4 @@ groups: The NVMe drive has reached 90% of its estimated lifetime. Note: A value of 100 does not indicate failure. For more details, visit https://charmhub.io/hardware-observer/docs/metrics-and-alerts-smart VALUE = {{ $value }} - LABELS = {{ $labels }} \ No newline at end of file + LABELS = {{ $labels }} diff --git a/tests/unit/test_alert_rules/test_smart.yaml b/tests/unit/test_alert_rules/test_smart.yaml index 8e07ad12..5030e226 100644 --- a/tests/unit/test_alert_rules/test_smart.yaml +++ b/tests/unit/test_alert_rules/test_smart.yaml @@ -168,7 +168,7 @@ tests: - interval: 1m input_series: - - series: 'smartctl_device_attribute{device="sda", attribute_id="5", attribute_name="Reallocated_Sectors_Count", instance="ubuntu-2"}' + - series: 'smartctl_device_attribute{device="sda", attribute_id="5", attribute_name="Reallocated_Sectors_Count", instance="ubuntu-2", attribute_value_type="raw"}' values: '2x10' alert_rule_test: @@ -181,13 +181,14 @@ tests: device: sda attribute_id: 5 attribute_name: Reallocated_Sectors_Count + attribute_value_type: raw exp_annotations: summary: SMART device attribute correlating with drive failure has its raw value greater than zero. (instance ubuntu-2) description: | SMART raw value for attribute "Reallocated_Sectors_Count" with id "5" on device "sda" is greater than 0. VALUE = 2 - LABELS = map[__name__:smartctl_device_attribute attribute_id:5 attribute_name:Reallocated_Sectors_Count device:sda instance:ubuntu-2] + LABELS = map[__name__:smartctl_device_attribute attribute_id:5 attribute_name:Reallocated_Sectors_Count attribute_value_type:raw device:sda instance:ubuntu-2] - interval: 1m input_series: @@ -230,4 +231,4 @@ tests: The NVMe drive has reached 90% of its estimated lifetime. Note: A value of 100 does not indicate failure. For more details, visit https://charmhub.io/hardware-observer/docs/metrics-and-alerts-smart VALUE = 95 - LABELS = map[__name__:smartctl_device_percentage_used device:nvme instance:ubuntu-4] \ No newline at end of file + LABELS = map[__name__:smartctl_device_percentage_used device:nvme instance:ubuntu-4]