From fa2e5bdc10ed5fc242f1e9dfd39b36b59e2a0060 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Mon, 16 Dec 2024 15:11:57 +0100 Subject: [PATCH] Add option to exclude alerts from the results The use case behind this: sometimes you want to define a so called Watchdog or DeadMansSwitch alert that is always firing, in order to monitoring that the alerting is working. When such a Watchdog is defined the list of all alerts will always be Critical. Thus we add a flag to exclude certain alerts. --- README.md | 13 +++++++------ cmd/alert.go | 32 ++++++++++++++++++++++++++++++++ cmd/alert_test.go | 22 ++++++++++++++++++++++ 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 56989ff..0c52211 100644 --- a/README.md +++ b/README.md @@ -150,12 +150,13 @@ Examples: | total=2 firing=1 pending=0 inactive=1 Flags: - -h, --help help for alert - -n, --name strings The name of one or more specific alerts to check. - This parameter can be repeated e.G.: '--name alert1 --name alert2' - If no name is given, all alerts will be evaluated - -T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK") - -P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed + --exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex. + -h, --help help for alert + -n, --name strings The name of one or more specific alerts to check. + This parameter can be repeated e.G.: '--name alert1 --name alert2' + If no name is given, all alerts will be evaluated + -T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK") + -P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed ``` #### Checking all defined alerts diff --git a/cmd/alert.go b/cmd/alert.go index 00cd8f7..31a02d1 100644 --- a/cmd/alert.go +++ b/cmd/alert.go @@ -3,6 +3,7 @@ package cmd import ( "errors" "fmt" + "regexp" "strings" "github.com/NETWAYS/check_prometheus/internal/alert" @@ -15,6 +16,7 @@ import ( type AlertConfig struct { AlertName []string Group []string + ExcludeAlerts []string ProblemsOnly bool NoAlertsState string } @@ -115,6 +117,17 @@ inactive = 0`, continue } + alertMatched, regexErr := matches(rl.AlertingRule.Name, cliAlertConfig.ExcludeAlerts) + + if regexErr != nil { + check.ExitRaw(check.Unknown, "Invalid regular expression provided:", regexErr.Error()) + } + + if alertMatched { + // If the alert matches a regex from the list we can skip it. + continue + } + // Handle Inactive Alerts if len(rl.AlertingRule.Alerts) == 0 { // Counting states for perfdata @@ -197,6 +210,8 @@ func init() { fs.StringVarP(&cliAlertConfig.NoAlertsState, "no-alerts-state", "T", "OK", "State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK") + fs.StringArrayVar(&cliAlertConfig.ExcludeAlerts, "exclude-alert", []string{}, "Alerts to ignore. Can be used multiple times and supports regex.") + fs.StringSliceVarP(&cliAlertConfig.AlertName, "name", "n", nil, "The name of one or more specific alerts to check."+ "\nThis parameter can be repeated e.G.: '--name alert1 --name alert2'"+ @@ -222,3 +237,20 @@ func convertStateToInt(state string) (int, error) { return check.Unknown, errors.New("invalid state") } } + +// Matches a list of regular expressions against a string. +func matches(input string, regexToExclude []string) (bool, error) { + for _, regex := range regexToExclude { + re, err := regexp.Compile(regex) + + if err != nil { + return false, err + } + + if re.MatchString(input) { + return true, nil + } + } + + return false, nil +} diff --git a/cmd/alert_test.go b/cmd/alert_test.go index 049a9ad..e585a40 100644 --- a/cmd/alert_test.go +++ b/cmd/alert_test.go @@ -98,6 +98,28 @@ exit status 2 exit status 2 `, }, + { + name: "alert-problems-only-with-exlude", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"status":"success","data":{"groups":[{"name":"Foo","file":"alerts.yaml","rules":[{"state":"inactive","name":"HostOutOfMemory","query":"up","duration":120,"labels":{"severity":"critical"},"annotations":{"description":"Foo","summary":"Foo"},"alerts":[],"health":"ok","evaluationTime":0.000553928,"lastEvaluation":"2022-11-24T14:08:17.597083058Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.000581212,"lastEvaluation":"2022-11-24T14:08:17.59706083Z"},{"name":"SQL","file":"alerts.yaml","rules":[{"state":"pending","name":"SqlAccessDeniedRate","query":"mysql","duration":17280000,"labels":{"severity":"warning"},"annotations":{"description":"MySQL","summary":"MySQL"},"alerts":[{"labels":{"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"},"annotations":{"description":"MySQL","summary":"MySQL"},"state":"pending","activeAt":"2022-11-21T10:38:35.373483748Z","value":"4.03448275862069e-01"}],"health":"ok","evaluationTime":0.002909617,"lastEvaluation":"2022-11-24T14:08:25.375220595Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.003046259,"lastEvaluation":"2022-11-24T14:08:25.375096825Z"},{"name":"TLS","file":"alerts.yaml","rules":[{"state":"firing","name":"BlackboxTLS","query":"SSL","duration":0,"labels":{"severity":"critical"},"annotations":{"description":"TLS","summary":"TLS"},"alerts":[{"labels":{"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"},"annotations":{"description":"TLS","summary":"TLS"},"state":"firing","activeAt":"2022-11-24T05:11:27.211699259Z","value":"-6.065338210999966e+06"}],"health":"ok","evaluationTime":0.000713955,"lastEvaluation":"2022-11-24T14:08:17.212720815Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.000738927,"lastEvaluation":"2022-11-24T14:08:17.212700182Z"}]}}`)) + })), + args: []string{"run", "../main.go", "alert", "--problems", "--exclude-alert", "Sql.*DeniedRate"}, + expected: `[CRITICAL] - 1 Alerts: 1 Firing - 0 Pending - 0 Inactive +\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 + +exit status 2 +`, + }, + { + name: "alert-with-exclude-error", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"status":"success","data":{"groups":[{"name":"k8s","file":"/etc/prometheus/rules/al.yaml","rules":[{"state":"inactive","name":"NodeHasMemoryPressure","query":"kube_node{condition=\"MemoryPressure\",status=\"true\"} == 1","duration":300,"keepFiringFor":0,"labels":{},"annotations":{"summary":"Memory pressure on instance {{ $labels.instance }}"},"alerts":[],"health":"ok","evaluationTime":0.00023339,"lastEvaluation":"2024-12-18T17:50:01.483161228Z","type":"alerting"}],"interval":15,"limit":0,"evaluationTime":0.000262616,"lastEvaluation":"2024-12-18T17:50:01.483135426Z"},{"name":"example","file":"/etc/prometheus/rules/rec.yaml","rules":[{"name":"rule:prometheus_http_requests_total:sum","query":"sum by (code) (rate(prometheus_http_requests_total[5m]))","health":"ok","evaluationTime":0.000472562,"lastEvaluation":"2024-12-18T17:50:12.420737469Z","type":"recording"}],"interval":15,"limit":0,"evaluationTime":0.000497618,"lastEvaluation":"2024-12-18T17:50:12.42071533Z"}],"groupNextToken:omitempty":""}}`)) + })), + args: []string{"run", "../main.go", "alert", "--exclude-alert", "[a-z"}, + expected: "[UNKNOWN] - Invalid regular expression provided: error parsing regexp: missing closing ]: `[a-z`\nexit status 3\n", + }, { name: "alert-no-such-alert", server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {