diff --git a/src/prometheus_alert_rules/smart.yaml b/src/prometheus_alert_rules/smart.yaml index 13f46f23..b067fad2 100644 --- a/src/prometheus_alert_rules/smart.yaml +++ b/src/prometheus_alert_rules/smart.yaml @@ -99,3 +99,29 @@ groups: on device "{{ $labels.device }}" is greater than 0. VALUE = {{ $value }} LABELS = {{ $labels }} + + - alert: SmartNVMeDriveLifetimeWarning + expr: smartctl_device_percentage_used{device=~"nvme.*"} >= 80 + for: 15m + labels: + severity: warning + annotations: + summary: NVMe drive is approaching its estimated lifetime (instance {{ $labels.instance }}) + description: | + The NVMe drive has reached 80% of its estimated lifetime. + Note: A value of 100 does not indicate failure. For more details, visit https://charmhub.io/hardware-observer/docs/metrics-and-alerts-smart + VALUE = {{ $value }} + LABELS = {{ $labels }} + + - alert: SmartNVMeDriveLifetimeCritical + expr: smartctl_device_percentage_used{device=~"nvme.*"} >= 90 + for: 15m + labels: + severity: critical + annotations: + summary: NVMe drive is close to reaching its estimated lifetime (instance {{ $labels.instance }}) + description: | + The NVMe drive has reached 90% of its estimated lifetime. + Note: A value of 100 does not indicate failure. For more details, visit https://charmhub.io/hardware-observer/docs/metrics-and-alerts-smart + VALUE = {{ $value }} + LABELS = {{ $labels }} \ No newline at end of file diff --git a/tests/unit/test_alert_rules/test_smart.yaml b/tests/unit/test_alert_rules/test_smart.yaml index 65ec30b0..8e07ad12 100644 --- a/tests/unit/test_alert_rules/test_smart.yaml +++ b/tests/unit/test_alert_rules/test_smart.yaml @@ -188,3 +188,46 @@ tests: on device "sda" is greater than 0. VALUE = 2 LABELS = map[__name__:smartctl_device_attribute attribute_id:5 attribute_name:Reallocated_Sectors_Count device:sda instance:ubuntu-2] + + - interval: 1m + input_series: + - series: 'smartctl_device_percentage_used{device="nvme", instance="ubuntu-3"}' + values: '85x20' + + alert_rule_test: + - eval_time: 20m + alertname: SmartNVMeDriveLifetimeWarning + exp_alerts: + - exp_labels: + severity: warning + instance: ubuntu-3 + device: nvme + exp_annotations: + summary: NVMe drive is approaching its estimated lifetime (instance ubuntu-3) + description: | + The NVMe drive has reached 80% of its estimated lifetime. + Note: A value of 100 does not indicate failure. For more details, visit https://charmhub.io/hardware-observer/docs/metrics-and-alerts-smart + VALUE = 85 + LABELS = map[__name__:smartctl_device_percentage_used device:nvme instance:ubuntu-3] + + + - interval: 1m + input_series: + - series: 'smartctl_device_percentage_used{device="nvme", instance="ubuntu-4"}' + values: '95x20' + + alert_rule_test: + - eval_time: 20m + alertname: SmartNVMeDriveLifetimeCritical + exp_alerts: + - exp_labels: + severity: critical + instance: ubuntu-4 + device: nvme + exp_annotations: + summary: NVMe drive is close to reaching its estimated lifetime (instance ubuntu-4) + description: | + The NVMe drive has reached 90% of its estimated lifetime. + Note: A value of 100 does not indicate failure. For more details, visit https://charmhub.io/hardware-observer/docs/metrics-and-alerts-smart + VALUE = 95 + LABELS = map[__name__:smartctl_device_percentage_used device:nvme instance:ubuntu-4] \ No newline at end of file