Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reject recording rules with incompatible fields #2000

Merged
merged 10 commits into from
Feb 4, 2025
6 changes: 3 additions & 3 deletions docs/resources/rule_group.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,18 +132,18 @@ EOT

Required:

- `condition` (String) The `ref_id` of the query node in the `data` field to use as the alert condition.
- `data` (Block List, Min: 1) A sequence of stages that describe the contents of the rule. (see [below for nested schema](#nestedblock--rule--data))
- `name` (String) The name of the alert rule.

Optional:

- `annotations` (Map of String) Key-value pairs of metadata to attach to the alert rule. They add additional information, such as a `summary` or `runbook_url`, to help identify and investigate alerts. The `dashboardUId` and `panelId` annotations, which link alerts to a panel, must be set together. Defaults to `map[]`.
- `exec_err_state` (String) Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to `Alerting`.
- `condition` (String) The `ref_id` of the query node in the `data` field to use as the alert condition.
- `exec_err_state` (String) Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to Alerting if not set.
- `for` (String) The amount of time for which the rule must be breached for the rule to be considered to be Firing. Before this time has elapsed, the rule is only considered to be Pending. Defaults to `0`.
- `is_paused` (Boolean) Sets whether the alert should be paused or not. Defaults to `false`.
- `labels` (Map of String) Key-value pairs to attach to the alert rule that can be used in matching, grouping, and routing. Defaults to `map[]`.
- `no_data_state` (String) Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to `NoData`.
- `no_data_state` (String) Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to NoData if not set.
- `notification_settings` (Block List, Max: 1) Notification settings for the rule. If specified, it overrides the notification policies. Available since Grafana 10.4, requires feature flag 'alertingSimplifiedRouting' to be enabled. (see [below for nested schema](#nestedblock--rule--notification_settings))
- `record` (Block List, Max: 1) Settings for a recording rule. Available since Grafana 11.2, requires feature flag 'grafanaManagedRecordingRules' to be enabled. (see [below for nested schema](#nestedblock--rule--record))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,29 +118,29 @@ func TestAccMuteTiming_RemoveInUse(t *testing.T) {
locals {
use_mute = %t
}

resource "grafana_organization" "my_org" {
name = "mute-timing-test"
}

resource "grafana_contact_point" "default_policy" {
org_id = grafana_organization.my_org.id
name = "default-policy"
email {
addresses = ["[email protected]"]
}
}

resource "grafana_notification_policy" "org_policy" {
org_id = grafana_organization.my_org.id
group_by = ["..."]
group_wait = "45s"
group_interval = "6m"
repeat_interval = "3h"
contact_point = grafana_contact_point.default_policy.name

policy {
mute_timings = local.use_mute ? [grafana_mute_timing.test[0].name] : []
mute_timings = local.use_mute ? [grafana_mute_timing.test[0].name] : []
contact_point = grafana_contact_point.default_policy.name
}
}
Expand All @@ -167,7 +167,7 @@ func TestAccMuteTiming_RemoveInUse(t *testing.T) {
}

func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
testutils.CheckCloudInstanceTestsEnabled(t) // TODO: Switch to OSS when this is released: https://github.com/grafana/grafana/pull/90500
testutils.CheckOSSTestsEnabled(t, ">=11.2.0")

randomStr := acctest.RandString(6)

Expand All @@ -176,11 +176,11 @@ func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
locals {
use_mute = %[2]t
}

resource "grafana_folder" "rule_folder" {
title = "%[1]s"
}

resource "grafana_contact_point" "default_policy" {
name = "%[1]s"
email {
Expand Down Expand Up @@ -219,7 +219,7 @@ func TestAccMuteTiming_RemoveInUseInAlertRule(t *testing.T) {
}
}


resource "grafana_mute_timing" "test" {
count = local.use_mute ? 1 : 0
name = "%[1]s"
Expand Down
64 changes: 56 additions & 8 deletions internal/resources/grafana/resource_alerting_rule_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,30 @@ This resource requires Grafana 9.1.0 or later.
"no_data_state": {
Type: schema.TypeString,
Optional: true,
Default: "NoData",
Description: "Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting.",
Description: "Describes what state to enter when the rule's query returns No Data. Options are OK, NoData, KeepLast, and Alerting. Defaults to NoData if not set.",
DiffSuppressFunc: func(k, oldValue, newValue string, d *schema.ResourceData) bool {
// We default to this value later in the pipeline, so we need to account for that here.
if newValue == "" {
return oldValue == "NoData"
}
return oldValue == newValue
},
},
"exec_err_state": {
Type: schema.TypeString,
Optional: true,
Default: "Alerting",
Description: "Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting.",
Description: "Describes what state to enter when the rule's query is invalid and the rule cannot be executed. Options are OK, Error, KeepLast, and Alerting. Defaults to Alerting if not set.",
DiffSuppressFunc: func(k, oldValue, newValue string, d *schema.ResourceData) bool {
// We default to this value later in the pipeline, so we need to account for that here.
if newValue == "" {
return oldValue == "Alerting"
}
return oldValue == newValue
},
},
"condition": {
Type: schema.TypeString,
Required: true,
Optional: true,
Description: "The `ref_id` of the query node in the `data` field to use as the alert condition.",
},
"data": {
Expand Down Expand Up @@ -527,17 +539,53 @@ func unpackAlertRule(raw interface{}, groupName string, folderUID string, orgID
return nil, err
}

// Check for conflicting fields before unpacking the rest of the rule.
// This is a workaround due to the lack of support for ConflictsWith in Lists in the SDK.
errState := json["exec_err_state"].(string)
noDataState := json["no_data_state"].(string)
condition := json["condition"].(string)

record := unpackRecord(json["record"])
if record != nil {
incompatFieldMsgFmt := `conflicting fields "record" and "%s"`
if forDuration != 0 {
return nil, fmt.Errorf(incompatFieldMsgFmt, "for")
}
if noDataState != "" {
return nil, fmt.Errorf(incompatFieldMsgFmt, "no_data_state")
}
if errState != "" {
return nil, fmt.Errorf(incompatFieldMsgFmt, "exec_err_state")
}
if condition != "" {
return nil, fmt.Errorf(incompatFieldMsgFmt, "condition")
}
}
if record == nil {
if condition == "" {
return nil, fmt.Errorf(`"condition" is required`)
}
// Compute defaults here to avoid issues related to the above, setting a default in the schema will cause these
// to be set before we can validate the configuration.
if noDataState == "" {
noDataState = "NoData"
}
if errState == "" {
errState = "Alerting"
}
}

rule := models.ProvisionedAlertRule{
UID: json["uid"].(string),
Title: common.Ref(json["name"].(string)),
FolderUID: common.Ref(folderUID),
RuleGroup: common.Ref(groupName),
OrgID: common.Ref(orgID),
ExecErrState: common.Ref(json["exec_err_state"].(string)),
NoDataState: common.Ref(json["no_data_state"].(string)),
ExecErrState: common.Ref(errState),
NoDataState: common.Ref(noDataState),
For: common.Ref(strfmt.Duration(forDuration)),
Data: data,
Condition: common.Ref(json["condition"].(string)),
Condition: common.Ref(condition),
Labels: unpackMap(json["labels"]),
Annotations: unpackMap(json["annotations"]),
IsPaused: json["is_paused"].(bool),
Expand Down
73 changes: 66 additions & 7 deletions internal/resources/grafana/resource_alerting_rule_group_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ func TestAccAlertRule_NotificationSettings(t *testing.T) {
}

func TestAccRecordingRule(t *testing.T) {
testutils.CheckCloudInstanceTestsEnabled(t) // TODO: change to 11.3.1 when available
testutils.CheckOSSTestsEnabled(t, ">=11.4.0")

var group models.AlertRuleGroup
var name = acctest.RandString(10)
Expand All @@ -696,17 +696,34 @@ func TestAccRecordingRule(t *testing.T) {
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.data.0.model", "{\"refId\":\"A\"}"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.record.0.metric", metric),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.record.0.from", "A"),
// ensure fields are cleared as expected
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.for", "2m0s"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.condition", "A"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.no_data_state", "NoData"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.exec_err_state", "Alerting"),
// ensure fields are empty as expected
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.for", "0s"),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.condition", ""),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.no_data_state", ""),
resource.TestCheckResourceAttr("grafana_rule_group.my_rule_group", "rule.0.exec_err_state", ""),
),
},
},
})
}

func TestAccRecordingRule_incompatibleSettings(t *testing.T) {
testutils.CheckOSSTestsEnabled(t, ">=11.4.0")

var name = acctest.RandString(10)
var metric = "valid_metric"

resource.ParallelTest(t, resource.TestCase{
ProtoV5ProviderFactories: testutils.ProtoV5ProviderFactories,
Steps: []resource.TestStep{
{
Config: testAccRecordingRuleInvalid(name, metric, "A"),
ExpectError: regexp.MustCompile(`conflicting fields "record" and "for"`),
},
},
})
}

func testAccAlertRuleGroupInOrgConfig(name string, interval int, disableProvenance bool) string {
return fmt.Sprintf(`
resource "grafana_organization" "test" {
Expand Down Expand Up @@ -877,7 +894,49 @@ resource "grafana_rule_group" "my_rule_group" {

rule {
name = "My Random Walk Alert"
// following should be cleared by Grafana

// Query the datasource.
data {
ref_id = "A"
relative_time_range {
from = 600
to = 0
}
datasource_uid = grafana_data_source.testdata_datasource.uid
model = jsonencode({
intervalMs = 1000
maxDataPoints = 43200
refId = "A"
})
}
record {
metric = "%[2]s"
from = "%[3]s"
}
}
}`, name, metric, refID)
}

func testAccRecordingRuleInvalid(name string, metric string, refID string) string {
return fmt.Sprintf(`
resource "grafana_folder" "rule_folder" {
title = "%[1]s"
}

resource "grafana_data_source" "testdata_datasource" {
name = "%[1]s"
type = "grafana-testdata-datasource"
url = "http://localhost:3333"
}

resource "grafana_rule_group" "my_rule_group" {
name = "%[1]s"
folder_uid = grafana_folder.rule_folder.uid
interval_seconds = 60

rule {
name = "My Random Walk Alert"
// following should be rejected
condition = "A"
no_data_state = "NoData"
exec_err_state = "Alerting"
Expand Down