Skip to content

Commit

Permalink
Add option to exclude alerts from the results
Browse files Browse the repository at this point in the history
The use case behind this: sometimes you want to define
a so called Watchdog or DeadMansSwitch alert that is always
firing, in order to monitoring that the alerting is working.

When such a Watchdog is defined the list of all alerts will
always be Critical. Thus we add a flag to exclude certain alerts.
  • Loading branch information
martialblog committed Dec 18, 2024
1 parent 67fb855 commit fa2e5bd
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 6 deletions.
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,13 @@ Examples:
| total=2 firing=1 pending=0 inactive=1
Flags:
-h, --help help for alert
-n, --name strings The name of one or more specific alerts to check.
This parameter can be repeated e.G.: '--name alert1 --name alert2'
If no name is given, all alerts will be evaluated
-T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK")
-P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed
--exclude-alert stringArray Alerts to ignore. Can be used multiple times and supports regex.
-h, --help help for alert
-n, --name strings The name of one or more specific alerts to check.
This parameter can be repeated e.G.: '--name alert1 --name alert2'
If no name is given, all alerts will be evaluated
-T, --no-alerts-state string State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK (default "OK")
-P, --problems Display only alerts which status is not inactive/OK. Note that in combination with the --name flag this might result in no alerts being displayed
```
#### Checking all defined alerts
Expand Down
32 changes: 32 additions & 0 deletions cmd/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"errors"
"fmt"
"regexp"
"strings"

"github.com/NETWAYS/check_prometheus/internal/alert"
Expand All @@ -15,6 +16,7 @@ import (
type AlertConfig struct {
AlertName []string
Group []string
ExcludeAlerts []string
ProblemsOnly bool
NoAlertsState string
}
Expand Down Expand Up @@ -115,6 +117,17 @@ inactive = 0`,
continue
}

alertMatched, regexErr := matches(rl.AlertingRule.Name, cliAlertConfig.ExcludeAlerts)

if regexErr != nil {
check.ExitRaw(check.Unknown, "Invalid regular expression provided:", regexErr.Error())
}

if alertMatched {
// If the alert matches a regex from the list we can skip it.
continue
}

// Handle Inactive Alerts
if len(rl.AlertingRule.Alerts) == 0 {
// Counting states for perfdata
Expand Down Expand Up @@ -197,6 +210,8 @@ func init() {

fs.StringVarP(&cliAlertConfig.NoAlertsState, "no-alerts-state", "T", "OK", "State to assign when no alerts are found (0, 1, 2, 3, OK, WARNING, CRITICAL, UNKNOWN). If not set this defaults to OK")

fs.StringArrayVar(&cliAlertConfig.ExcludeAlerts, "exclude-alert", []string{}, "Alerts to ignore. Can be used multiple times and supports regex.")

fs.StringSliceVarP(&cliAlertConfig.AlertName, "name", "n", nil,
"The name of one or more specific alerts to check."+
"\nThis parameter can be repeated e.G.: '--name alert1 --name alert2'"+
Expand All @@ -222,3 +237,20 @@ func convertStateToInt(state string) (int, error) {
return check.Unknown, errors.New("invalid state")
}
}

// Matches a list of regular expressions against a string.
func matches(input string, regexToExclude []string) (bool, error) {
for _, regex := range regexToExclude {
re, err := regexp.Compile(regex)

if err != nil {
return false, err
}

if re.MatchString(input) {
return true, nil
}
}

return false, nil
}
22 changes: 22 additions & 0 deletions cmd/alert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,28 @@ exit status 2
exit status 2
`,
},
{
name: "alert-problems-only-with-exlude",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"status":"success","data":{"groups":[{"name":"Foo","file":"alerts.yaml","rules":[{"state":"inactive","name":"HostOutOfMemory","query":"up","duration":120,"labels":{"severity":"critical"},"annotations":{"description":"Foo","summary":"Foo"},"alerts":[],"health":"ok","evaluationTime":0.000553928,"lastEvaluation":"2022-11-24T14:08:17.597083058Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.000581212,"lastEvaluation":"2022-11-24T14:08:17.59706083Z"},{"name":"SQL","file":"alerts.yaml","rules":[{"state":"pending","name":"SqlAccessDeniedRate","query":"mysql","duration":17280000,"labels":{"severity":"warning"},"annotations":{"description":"MySQL","summary":"MySQL"},"alerts":[{"labels":{"alertname":"SqlAccessDeniedRate","instance":"localhost","job":"mysql","severity":"warning"},"annotations":{"description":"MySQL","summary":"MySQL"},"state":"pending","activeAt":"2022-11-21T10:38:35.373483748Z","value":"4.03448275862069e-01"}],"health":"ok","evaluationTime":0.002909617,"lastEvaluation":"2022-11-24T14:08:25.375220595Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.003046259,"lastEvaluation":"2022-11-24T14:08:25.375096825Z"},{"name":"TLS","file":"alerts.yaml","rules":[{"state":"firing","name":"BlackboxTLS","query":"SSL","duration":0,"labels":{"severity":"critical"},"annotations":{"description":"TLS","summary":"TLS"},"alerts":[{"labels":{"alertname":"TLS","instance":"https://localhost:443","job":"blackbox","severity":"critical"},"annotations":{"description":"TLS","summary":"TLS"},"state":"firing","activeAt":"2022-11-24T05:11:27.211699259Z","value":"-6.065338210999966e+06"}],"health":"ok","evaluationTime":0.000713955,"lastEvaluation":"2022-11-24T14:08:17.212720815Z","type":"alerting"}],"interval":10,"limit":0,"evaluationTime":0.000738927,"lastEvaluation":"2022-11-24T14:08:17.212700182Z"}]}}`))
})),
args: []string{"run", "../main.go", "alert", "--problems", "--exclude-alert", "Sql.*DeniedRate"},
expected: `[CRITICAL] - 1 Alerts: 1 Firing - 0 Pending - 0 Inactive
\_ [CRITICAL] [BlackboxTLS] - Job: [blackbox] on Instance: [https://localhost:443] is firing - value: -6065338.00
exit status 2
`,
},
{
name: "alert-with-exclude-error",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"status":"success","data":{"groups":[{"name":"k8s","file":"/etc/prometheus/rules/al.yaml","rules":[{"state":"inactive","name":"NodeHasMemoryPressure","query":"kube_node{condition=\"MemoryPressure\",status=\"true\"} == 1","duration":300,"keepFiringFor":0,"labels":{},"annotations":{"summary":"Memory pressure on instance {{ $labels.instance }}"},"alerts":[],"health":"ok","evaluationTime":0.00023339,"lastEvaluation":"2024-12-18T17:50:01.483161228Z","type":"alerting"}],"interval":15,"limit":0,"evaluationTime":0.000262616,"lastEvaluation":"2024-12-18T17:50:01.483135426Z"},{"name":"example","file":"/etc/prometheus/rules/rec.yaml","rules":[{"name":"rule:prometheus_http_requests_total:sum","query":"sum by (code) (rate(prometheus_http_requests_total[5m]))","health":"ok","evaluationTime":0.000472562,"lastEvaluation":"2024-12-18T17:50:12.420737469Z","type":"recording"}],"interval":15,"limit":0,"evaluationTime":0.000497618,"lastEvaluation":"2024-12-18T17:50:12.42071533Z"}],"groupNextToken:omitempty":""}}`))
})),
args: []string{"run", "../main.go", "alert", "--exclude-alert", "[a-z"},
expected: "[UNKNOWN] - Invalid regular expression provided: error parsing regexp: missing closing ]: `[a-z`\nexit status 3\n",
},
{
name: "alert-no-such-alert",
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down

0 comments on commit fa2e5bd

Please sign in to comment.