From 0da4d7999d831ca26983df2e6d2cad68f0e5cd84 Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Fri, 20 Dec 2024 11:59:36 -0500 Subject: [PATCH 1/2] feat: create direct paging integrations with two default routes (+ migrate existing ones) --- .../integrations/references/manual/index.md | 38 ++++++++- ...upsert_direct_paging_integration_routes.py | 84 +++++++++++++++++++ .../alerts/models/alert_receive_channel.py | 51 ++++++++--- .../tests/test_alert_receiver_channel.py | 38 +++++++-- 4 files changed, 187 insertions(+), 24 deletions(-) create mode 100644 engine/apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py diff --git a/docs/sources/configure/integrations/references/manual/index.md b/docs/sources/configure/integrations/references/manual/index.md index cad437dcfe..bcf7fcd8ea 100644 --- a/docs/sources/configure/integrations/references/manual/index.md +++ b/docs/sources/configure/integrations/references/manual/index.md @@ -89,9 +89,16 @@ to the team's ChatOps channels and start an appropriate escalation chain. ## Set up direct paging for a team -By default all teams will have a direct paging integration created for them. However, these are not configured by default. -If a team does not have their direct paging integration configured, such that it is "contactable" (ie. it has an -escalation chain assigned to it, or has at least one Chatops integration connected to send notifications to), you will +By default all teams will have a direct paging integration created for them. Each direct paging integration will be +created with two routes: + +- a non-default route which has a Jinja2 filtering term of `{{ payload.oncall.important }}` +(see [Important Escalations](#important-escalations) below for more details) +- a default route to capture all other alerts + +However, these integrations are not configured by default to be "contactable" (ie. their routes will have no +escalation chains assigned to them, nor any Chatops integrations connected to send notifications to). +If a team does not have their direct paging integration configured, such that it is "contactable" , you will not be able to direct page this team. If this happens, consider following the following steps for the team (or reach out to the relevant team and suggest doing so). @@ -102,4 +109,27 @@ and select the same team for a test run. ### Important escalations -TODO: +Sometimes you really need to get the attention of a particular team. When directly paging a team, it is possible to +page them using an "important escalation". Practically speaking, this will create an alert, using the specified team's +direct paging integration as such: + +```json +{ + "oncall": { + "title": "IRM is paging Network team to join escalation", + "message": "I really need someone from your team to come take a look! The k8s cluster is down!", + "uid": "8a20b8d1-56fd-482e-824e-43fbd1bd7b10", + "author_username": "irm", + "permalink": null, + "important": true + } +} +``` + +When you are directly paging a team, either via the web UI, chatops apps, or the API, you can specify that this +esclation be "important", which will effectively set the value of `oncall.important` to `true`. As mentioned above in +[Set up direct paging for a team](#set-up-direct-paging-for-a-team), direct paging integrations come pre-configured with +two routes, with the non-default route having a Jinja2 filtering term of `{{ payload.oncall.important }}`. + +This allows teams to be contacted via different escalation chains, depending on whether or not the user paging them +believes that this is an "important escalation". diff --git a/engine/apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py b/engine/apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py new file mode 100644 index 0000000000..7345b3c6a6 --- /dev/null +++ b/engine/apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py @@ -0,0 +1,84 @@ +# Generated by Django 4.2.17 on 2024-12-20 14:19 + +import logging + +from django.db import migrations +from django.db.models import Count + +logger = logging.getLogger(__name__) + + +def upsert_direct_paging_integration_routes(apps, schema_editor): + AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel") + ChannelFilter = apps.get_model("alerts", "ChannelFilter") + + DIRECT_PAGING_INTEGRATION_TYPE = "direct_paging" + IMPORTANT_FILTERING_TERM = "{{ payload.oncall.important }}" + + # Fetch all direct paging integrations + logger.info("Fetching direct paging integrations which have not had their routes updated.") + + # Ignore updating Direct Paging integrations that have > 1 route, as this means that users have + # gone ahead and created their own routes. We don't want to overwrite these. + unedited_direct_paging_integrations = ( + AlertReceiveChannel.objects + .filter(integration=DIRECT_PAGING_INTEGRATION_TYPE) + .annotate(num_routes=Count("channel_filters")) + .filter(num_routes=1) + ) + + integration_count = unedited_direct_paging_integrations.count() + if integration_count == 0: + logger.info("No integrations found which meet this criteria. No routes will be upserted.") + return + + logger.info(f"Found {integration_count} direct paging integrations that meet this criteria.") + + # Direct Paging Integrations are currently created with a single default route (order=0) + # see AlertReceiveChannelManager.create_missing_direct_paging_integrations + # + # we first need to update this route to be order=1, and then we will subsequently bulk-create the + # non-default route (order=0) which will have a filtering term set + routes = ChannelFilter.objects.filter( + alert_receive_channel__in=unedited_direct_paging_integrations, + is_default=True, + order=0, + ) + + logger.info( + f"Swapping the order=0 value to order=1 for {routes.count()} Direct Paging Integrations default routes" + ) + + updated_rows = routes.update(order=1) + logger.info(f"Swapped order=0 to order=1 for {updated_rows} Direct Paging Integrations default routes") + + # Bulk create the new non-default routes + logger.info( + f"Creating new non-default routes for {len(unedited_direct_paging_integrations)} Direct Paging Integrations" + ) + created_objs = ChannelFilter.objects.bulk_create( + [ + ChannelFilter( + alert_receive_channel=integration, + filtering_term=IMPORTANT_FILTERING_TERM, + filtering_term_type=1, # 1 = ChannelFilter.FILTERING_TERM_TYPE_JINJA2 + is_default=False, + order=0, + ) for integration in unedited_direct_paging_integrations + ], + batch_size=5000, + ) + logger.info(f"Created {len(created_objs)} new non-default routes for Direct Paging Integrations") + + logger.info("Migration for direct paging integration routes completed.") + + +class Migration(migrations.Migration): + + dependencies = [ + ("alerts", "0071_migrate_labels"), + ] + + operations = [ + migrations.RunPython(upsert_direct_paging_integration_routes, migrations.RunPython.noop), + ] diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py index 74fc5d237a..a8089337d4 100644 --- a/engine/apps/alerts/models/alert_receive_channel.py +++ b/engine/apps/alerts/models/alert_receive_channel.py @@ -126,6 +126,8 @@ class AlertReceiveChannelManager(models.Manager): def create_missing_direct_paging_integrations(organization: "Organization") -> None: from apps.alerts.models import ChannelFilter + logger.info(f"Starting create_missing_direct_paging_integrations for organization: {organization.id}") + # fetch teams without direct paging integration teams_missing_direct_paging = list( organization.teams.exclude( @@ -134,10 +136,17 @@ def create_missing_direct_paging_integrations(organization: "Organization") -> N ).values_list("team_id", flat=True) ) ) + number_of_teams_missing_direct_paging = len(teams_missing_direct_paging) + logger.info( + f"Found {number_of_teams_missing_direct_paging} teams missing direct paging integrations.", + ) + if not teams_missing_direct_paging: + logger.info("No missing direct paging integrations found. Exiting.") return # create missing integrations + logger.info(f"Creating missing direct paging integrations for {number_of_teams_missing_direct_paging} teams.") AlertReceiveChannel.objects.bulk_create( [ AlertReceiveChannel( @@ -151,29 +160,49 @@ def create_missing_direct_paging_integrations(organization: "Organization") -> N batch_size=5000, ignore_conflicts=True, # ignore if direct paging integration already exists for team ) + logger.info("Missing direct paging integrations creation step completed.") # fetch integrations for teams (some of them are created above, but some may already exist previously) alert_receive_channels = organization.alert_receive_channels.filter( team__in=teams_missing_direct_paging, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING ) + logger.info(f"Fetched {alert_receive_channels.count()} direct paging integrations for the specified teams.") + + # we create two routes for each Direct Paging Integration + # 1. route for important alerts (using the payload.oncall.important alert field value) - non-default + # 2. route for all other alerts - default + routes_to_create = [] + for alert_receive_channel in alert_receive_channels: + routes_to_create.extend( + [ + ChannelFilter( + alert_receive_channel=alert_receive_channel, + filtering_term="{{ payload.oncall.important }}", + filtering_term_type=ChannelFilter.FILTERING_TERM_TYPE_JINJA2, + is_default=False, + order=0, + ), + ChannelFilter( + alert_receive_channel=alert_receive_channel, + filtering_term=None, + is_default=True, + order=1, + ), + ] + ) - # create default routes + logger.info(f"Creating {len(routes_to_create)} channel filter routes.") ChannelFilter.objects.bulk_create( - [ - ChannelFilter( - alert_receive_channel=alert_receive_channel, - filtering_term=None, - is_default=True, - order=0, - ) - for alert_receive_channel in alert_receive_channels - ], + routes_to_create, batch_size=5000, - ignore_conflicts=True, # ignore if default route already exists for integration + ignore_conflicts=True, # ignore if routes already exist for integration ) + logger.info("Direct paging routes creation completed.") # add integrations to metrics cache + logger.info("Adding integrations to metrics cache.") metrics_add_integrations_to_cache(list(alert_receive_channels), organization) + logger.info("Integrations have been added to the metrics cache.") def get_queryset(self): return AlertReceiveChannelQueryset(self.model, using=self._db).filter( diff --git a/engine/apps/alerts/tests/test_alert_receiver_channel.py b/engine/apps/alerts/tests/test_alert_receiver_channel.py index 930239826d..d1f6dc3948 100644 --- a/engine/apps/alerts/tests/test_alert_receiver_channel.py +++ b/engine/apps/alerts/tests/test_alert_receiver_channel.py @@ -259,27 +259,47 @@ def test_create_missing_direct_paging_integrations( ): organization = make_organization() - # team with no direct paging integration + # two teams with no direct paging integration team1 = make_team(organization) + team2 = make_team(organization) # team with direct paging integration - team2 = make_team(organization) + team3 = make_team(organization) alert_receive_channel = make_alert_receive_channel( - organization, team=team2, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING + organization, team=team3, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING ) make_channel_filter(alert_receive_channel, is_default=True, order=0) # create missing direct paging integration for organization AlertReceiveChannel.objects.create_missing_direct_paging_integrations(organization) + assert organization.alert_receive_channels.count() == 3 + # check that missing integrations and default routes were created - assert organization.alert_receive_channels.count() == 2 - mock_metrics_add_integrations_to_cache.assert_called_once() + # + # NOTE: we explicitly don't test team3, it already has a Direct Paging integraiton associated with it + # and AlertReceiveChannel.objects.create_missing_direct_paging_integrations is not responsible for filling + # in missing routes. + # + # See apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py which is a data migration that does + # exactly this. for team in [team1, team2]: - alert_receive_channel = organization.alert_receive_channels.get( - team=team, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING - ) - assert alert_receive_channel.channel_filters.get().is_default + alert_receive_channel = organization.alert_receive_channels.get(team=team) + + direct_paging_integration_routes = alert_receive_channel.channel_filters.all() + + assert direct_paging_integration_routes.count() == 2 + + for route in direct_paging_integration_routes: + if route.is_default: + assert route.order == 1 + assert route.filtering_term is None + else: + assert route.order == 0 + assert route.filtering_term == "{{ payload.oncall.important }}" + assert route.filtering_term_type == route.FILTERING_TERM_TYPE_JINJA2 + + mock_metrics_add_integrations_to_cache.assert_called_once() @pytest.mark.django_db From d624c7f7e54b651a7aa936ad1430d1cbe26dedef Mon Sep 17 00:00:00 2001 From: Joey Orlando Date: Fri, 20 Dec 2024 12:15:33 -0500 Subject: [PATCH 2/2] update `test_sync_teams_for_organization` test --- .../apps/user_management/tests/test_sync.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/engine/apps/user_management/tests/test_sync.py b/engine/apps/user_management/tests/test_sync.py index e381c62a47..3f5bdd9a5c 100644 --- a/engine/apps/user_management/tests/test_sync.py +++ b/engine/apps/user_management/tests/test_sync.py @@ -203,23 +203,33 @@ def test_sync_teams_for_organization(make_organization, make_team, make_alert_re assert created_team.team_id == api_teams[2]["id"] assert created_team.name == api_teams[2]["name"] + def _assert_teams_direct_paging_integration_is_configured_properly(integration): + assert integration.channel_filters.count() == 2 + + for route in integration.channel_filters.all(): + if route.is_default: + assert route.order == 1 + assert route.filtering_term is None + else: + assert route.order == 0 + assert route.filtering_term == "{{ payload.oncall.important }}" + assert route.filtering_term_type == route.FILTERING_TERM_TYPE_JINJA2 + # check that direct paging is created for created team direct_paging_integration = AlertReceiveChannel.objects.get( organization=organization, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, team=created_team, ) - assert direct_paging_integration.channel_filters.count() == 1 - assert direct_paging_integration.channel_filters.first().order == 0 - assert direct_paging_integration.channel_filters.first().is_default + _assert_teams_direct_paging_integration_is_configured_properly(direct_paging_integration) # check that direct paging is created for existing team direct_paging_integration = AlertReceiveChannel.objects.get( - organization=organization, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, team=teams[2] + organization=organization, + integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, + team=teams[2], ) - assert direct_paging_integration.channel_filters.count() == 1 - assert direct_paging_integration.channel_filters.first().order == 0 - assert direct_paging_integration.channel_filters.first().is_default + _assert_teams_direct_paging_integration_is_configured_properly(direct_paging_integration) @pytest.mark.django_db