Skip to content

Commit

Permalink
alert when instance count is low (#1472)
Browse files Browse the repository at this point in the history
* alert when instance count is low
* Fix non-pr env logic

---------

Co-authored-by: Samuel Aquino <[email protected]>
Co-authored-by: halprin <[email protected]>
Co-authored-by: James Gilmore <[email protected]>
Co-authored-by: jherrflexion <[email protected]>
  • Loading branch information
5 people authored Oct 23, 2024
1 parent 6ba8ac1 commit eca4729
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
41 changes: 41 additions & 0 deletions operations/template/alert.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,47 @@ resource "azurerm_monitor_metric_alert" "azure_4XX_alert" {
]
}
}
resource "azurerm_monitor_metric_alert" "low_instance_count_alert" {
count = local.non_pr_environment ? 1 : 0
name = "cdcti-${var.environment}-azure-low-instance-count-alert"
resource_group_name = data.azurerm_resource_group.group.name
scopes = [azurerm_monitor_autoscale_setting.api_autoscale.id]
description = "The instance count in ${var.environment} is too low"
severity = 2 // warning
frequency = "PT1M" // Checks every 1 minute
window_size = "PT15M" // Every Check, looks back 15 minutes in history

criteria {
metric_namespace = "Microsoft.Insights/autoscalesettings"
metric_name = "ObservedCapacity"
aggregation = "Average"
operator = "LessThanOrEqual"
threshold = azurerm_monitor_autoscale_setting.api_autoscale.profile[0].capacity[0].default - 0.5
}

action {
action_group_id = azurerm_monitor_action_group.notify_slack_email[count.index].id
}

lifecycle {
# Ignore changes to tags because the CDC sets these automagically
ignore_changes = [
tags["business_steward"],
tags["center"],
tags["environment"],
tags["escid"],
tags["funding_source"],
tags["pii_data"],
tags["security_compliance"],
tags["security_steward"],
tags["support_group"],
tags["system"],
tags["technical_steward"],
tags["zone"]
]
}
}

resource "azurerm_monitor_scheduled_query_rules_alert" "ti-log-errors-alert" {
count = local.non_pr_environment ? 1 : 0
name = "cdcti-${var.environment}-log-errors-alert"
Expand Down
4 changes: 3 additions & 1 deletion operations/template/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ locals {
rs_domain_prefix = "${local.selected_rs_environment_prefix}${length(local.selected_rs_environment_prefix) == 0 ? "" : "."}"
higher_environment_level = var.environment == "stg" || var.environment == "prd"
cdc_domain_environment = var.environment == "dev" || var.environment == "stg" || var.environment == "prd"
non_pr_environment = !strcontains(var.environment, "pr")

// If the environment looks like pr123, regexall will contain matches. If there are no matches, it's a non-pr env
non_pr_environment = length(regexall("^pr\\d+", var.environment)) == 0
}

data "azurerm_resource_group" "group" {
Expand Down

0 comments on commit eca4729

Please sign in to comment.