From bcdd15326efc502334bc570beb3b6383bc165135 Mon Sep 17 00:00:00 2001 From: Markus Opolka Date: Mon, 16 Dec 2024 15:11:57 +0100 Subject: [PATCH] Add option to exclude alerts from the results The use case behind this: sometimes you want to define a so called Watchdog or DeadMansSwitch alert that is always firing, in order to monitoring that the alerting is working. When such a Watchdog is defined the list of all alerts will always be Critical. Thus we add a flag to exclude certain alerts. --- README.md | 13 +++++++------ cmd/alert.go | 21 +++++++++++++++++++++ cmd/alert_test.go | 13 +++++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 56989ff..0c52211 100644 --- a/README.md +++ b/README.md @@ -150,12 +150,13 @@ Examples: | total=2 firing=1 pending=0 inactive=1 Flags: - -h, --help help for alert - -n, --name strings The name of one or more specific alerts to check. - This parameter can be repeated e.G.: '--name alert1 --name alert2' - If no name is given, all alerts will be evaluated - -T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK") - -P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed + --exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex. + -h, --help help for alert + -n, --name strings The name of one or more specific alerts to check. + This parameter can be repeated e.G.: '--name alert1 --name alert2' + If no name is given, all alerts will be evaluated + -T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK") + -P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed ``` #### Checking all defined alerts diff --git a/cmd/alert.go b/cmd/alert.go index 00cd8f7..b72b751 100644 --- a/cmd/alert.go +++ b/cmd/alert.go @@ -3,6 +3,7 @@ package cmd import ( "errors" "fmt" + "regexp" "strings" "github.com/NETWAYS/check_prometheus/internal/alert" @@ -15,6 +16,7 @@ import ( type AlertConfig struct { AlertName []string Group []string + ExcludeAlerts []string ProblemsOnly bool NoAlertsState string } @@ -115,6 +117,11 @@ inactive = 0`, continue } + if matches(rl.AlertingRule.Name, cliAlertConfig.ExcludeAlerts) { + // If the alert matches a regex from the list we can skip it. + continue + } + // Handle Inactive Alerts if len(rl.AlertingRule.Alerts) == 0 { // Counting states for perfdata @@ -197,6 +204,8 @@ func init() { fs.StringVarP(&cliAlertConfig.NoAlertsState, "no-alerts-state", "T", "OK", "State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK") + fs.StringArrayVar(&cliAlertConfig.ExcludeAlerts, "exclude-alert", []string{}, "Alerts to ignore. Can be used multiple times and supports regex.") + fs.StringSliceVarP(&cliAlertConfig.AlertName, "name", "n", nil, "The name of one or more specific alerts to check."+ "\nThis parameter can be repeated e.G.: '--name alert1 --name alert2'"+ @@ -222,3 +231,15 @@ func convertStateToInt(state string) (int, error) { return check.Unknown, errors.New("invalid state") } } + +// Matches a list of regular expressions against a string. +func matches(input string, regexToExclude []string) bool { + for _, regex := range regexToExclude { + re := regexp.MustCompile(regex) + if re.MatchString(input) { + return true + } + } + + return false +} diff --git a/cmd/alert_test.go b/cmd/alert_test.go index 049a9ad..6a176c0 100644 --- a/cmd/alert_test.go +++ b/cmd/alert_test.go @@ -95,6 +95,19 @@ exit status 2 \_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 |total=2 firing=1 pending=1 inactive=0 +exit status 2 +`, + }, + { + name: "alert-problems-only-with-exlude", + server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`{"status":"success","data":{"groups":[{"name":"Foo","file":"alerts.yaml","rules":[{"state":"inactive","name":"HostOutOfMemory","query":"up","duration":120,"labels":{"severity":"critical"},"annotations":{"description":"Foo","summary":"Foo"},"alerts":[],"health":"ok","evaluationTime":0.000553928,"lastEvaluation":"2022-11-24T14:08:17.597083058Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.000581212,"lastEvaluation":"2022-11-24T14:08:17.59706083Z"},{"name":"SQL","file":"alerts.yaml","rules":[{"state":"pending","name":"SqlAccessDeniedRate","query":"mysql","duration":17280000,"labels":{"severity":"warning"},"annotations":{"description":"MySQL","summary":"MySQL"},"alerts":[{"labels":{"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"},"annotations":{"description":"MySQL","summary":"MySQL"},"state":"pending","activeAt":"2022-11-21T10:38:35.373483748Z","value":"4.03448275862069e-01"}],"health":"ok","evaluationTime":0.002909617,"lastEvaluation":"2022-11-24T14:08:25.375220595Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.003046259,"lastEvaluation":"2022-11-24T14:08:25.375096825Z"},{"name":"TLS","file":"alerts.yaml","rules":[{"state":"firing","name":"BlackboxTLS","query":"SSL","duration":0,"labels":{"severity":"critical"},"annotations":{"description":"TLS","summary":"TLS"},"alerts":[{"labels":{"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"},"annotations":{"description":"TLS","summary":"TLS"},"state":"firing","activeAt":"2022-11-24T05:11:27.211699259Z","value":"-6.065338210999966e+06"}],"health":"ok","evaluationTime":0.000713955,"lastEvaluation":"2022-11-24T14:08:17.212720815Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.000738927,"lastEvaluation":"2022-11-24T14:08:17.212700182Z"}]}}`)) + })), + args: []string{"run", "../main.go", "alert", "--problems", "--exclude-alert", "Sql.*DeniedRate"}, + expected: `[CRITICAL] - 1 Alerts: 1 Firing - 0 Pending - 0 Inactive +\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00 + exit status 2 `, },