Merge pull request #5180 from grafana/dev

v1.11.1
grafana · Oct 15, 2024 · 4680512 · 4680512
2 parents fc60847 + 4667960
commit 4680512
Show file tree

Hide file tree

Showing 33 changed files with 217 additions and 68 deletions.
diff --git a/docs/sources/manage/notify/slack/index.md b/docs/sources/manage/notify/slack/index.md
@@ -108,12 +108,6 @@ This set of permissions is supporting the ability of Grafana OnCall to match use
 - **View user groups in your workspace**
 - **View profile details about people in your workspace**
 
-### Perform actions as you
-
-- **Send messages on your behalf** — this permission may sound suspicious, but it's actually a general ability
-  to send messages as the bot: <https://api.slack.com/scopes/chat:write> Grafana OnCall will not impersonate or post
-  using your handle to slack. It will always post as the bot.
-
 ### Perform actions in channels & conversations
 
 - **View messages that directly mention @grafana_oncall in conversations that the app is in**

diff --git a/docs/sources/set-up/open-source/index.md b/docs/sources/set-up/open-source/index.md
@@ -122,7 +122,6 @@ oauth_config:
   scopes:
     user:
       - channels:read
-      - chat:write
       - identify
       - users.profile:read
     bot:

diff --git a/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py b/engine/apps/alerts/incident_appearance/renderers/slack_renderer.py
@@ -202,9 +202,6 @@ def _make_button(text, action_id_step_class_name, action_id_scenario_step="distr
         unsilence_button = _make_button("Unsilence", "UnSilenceGroupStep")
         responders_button = _make_button("Responders", "StartManageResponders", "manage_responders")
         attach_button = _make_button("Attach to ...", "SelectAttachGroupStep")
-        format_alert_button = _make_button(
-            ":mag: Format Alert", "OpenAlertAppearanceDialogStep", "alertgroup_appearance"
-        )
 
         resolution_notes_count = alert_group.resolution_notes.count()
         resolution_notes_button = {
@@ -275,9 +272,6 @@ def _make_button(text, action_id_step_class_name, action_id_scenario_step="distr
             else:
                 buttons.append(unresolve_button)
 
-            if integration.is_available_for_custom_templates:
-                buttons.append(format_alert_button)
-
             buttons.append(resolution_notes_button)
 
             if grafana_incident_enabled and not alert_group.acknowledged:

diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py
@@ -26,6 +26,9 @@
     from apps.user_management.models import User
 
 
+RETRY_TIMEOUT_HOURS = 1
+
+
 def schedule_send_bundled_notification_task(
     user_notification_bundle: "UserNotificationBundle", alert_group: "AlertGroup"
 ):
@@ -445,10 +448,29 @@ def perform_notification(log_record_pk, use_default_notification_policy_fallback
         try:
             TelegramToUserConnector.notify_user(user, alert_group, notification_policy)
         except RetryAfter as e:
-            countdown = getattr(e, "retry_after", 3)
-            raise perform_notification.retry(
-                (log_record_pk, use_default_notification_policy_fallback), countdown=countdown, exc=e
-            )
+            task_logger.exception(f"Telegram API rate limit exceeded. Retry after {e.retry_after} seconds.")
+            # check how much time has passed since log record was created
+            # to prevent eternal loop of restarting perform_notification task
+            if timezone.now() < log_record.created_at + timezone.timedelta(hours=RETRY_TIMEOUT_HOURS):
+                countdown = getattr(e, "retry_after", 3)
+                perform_notification.apply_async(
+                    (log_record_pk, use_default_notification_policy_fallback), countdown=countdown
+                )
+            else:
+                task_logger.debug(
+                    f"telegram notification for alert_group {alert_group.pk} failed because of rate limit"
+                )
+                UserNotificationPolicyLogRecord(
+                    author=user,
+                    type=UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED,
+                    notification_policy=notification_policy,
+                    reason="Telegram rate limit exceeded",
+                    alert_group=alert_group,
+                    notification_step=notification_policy.step,
+                    notification_channel=notification_channel,
+                    notification_error_code=UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_TELEGRAM_RATELIMIT,
+                ).save()
+            return
 
     elif notification_channel == UserNotificationPolicy.NotificationChannel.SLACK:
         # TODO: refactor checking the possibility of sending a notification in slack
@@ -516,13 +538,12 @@ def perform_notification(log_record_pk, use_default_notification_policy_fallback
                 ).save()
                 return
 
-            retry_timeout_hours = 1
             if alert_group.slack_message:
                 alert_group.slack_message.send_slack_notification(user, alert_group, notification_policy)
                 task_logger.debug(f"Finished send_slack_notification for alert_group {alert_group.pk}.")
             # check how much time has passed since log record was created
             # to prevent eternal loop of restarting perform_notification task
-            elif timezone.now() < log_record.created_at + timezone.timedelta(hours=retry_timeout_hours):
+            elif timezone.now() < log_record.created_at + timezone.timedelta(hours=RETRY_TIMEOUT_HOURS):
                 task_logger.debug(
                     f"send_slack_notification for alert_group {alert_group.pk} failed because slack message "
                     f"does not exist. Restarting perform_notification."
@@ -534,7 +555,7 @@ def perform_notification(log_record_pk, use_default_notification_policy_fallback
             else:
                 task_logger.debug(
                     f"send_slack_notification for alert_group {alert_group.pk} failed because slack message "
-                    f"after {retry_timeout_hours} hours still does not exist"
+                    f"after {RETRY_TIMEOUT_HOURS} hours still does not exist"
                 )
                 UserNotificationPolicyLogRecord(
                     author=user,

diff --git a/engine/apps/alerts/tests/test_notify_user.py b/engine/apps/alerts/tests/test_notify_user.py
@@ -360,12 +360,30 @@ def test_perform_notification_telegram_retryafter_error(
     countdown = 15
     exc = RetryAfter(countdown)
     with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user:
-        with pytest.raises(RetryAfter):
+        with patch.object(perform_notification, "apply_async") as mock_apply_async:
             perform_notification(log_record.pk, False)
 
     mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy)
+    # task is rescheduled using the countdown value from the exception
+    mock_apply_async.assert_called_once_with((log_record.pk, False), countdown=countdown)
     assert alert_group.personal_log_records.last() == log_record
 
+    # but if the log was too old, skip and create a failed log record
+    log_record.created_at = timezone.now() - timezone.timedelta(minutes=90)
+    log_record.save()
+    with patch.object(TelegramToUserConnector, "notify_user", side_effect=exc) as mock_notify_user:
+        with patch.object(perform_notification, "apply_async") as mock_apply_async:
+            perform_notification(log_record.pk, False)
+    mock_notify_user.assert_called_once_with(user, alert_group, user_notification_policy)
+    assert not mock_apply_async.called
+    last_log_record = UserNotificationPolicyLogRecord.objects.last()
+    assert last_log_record.type == UserNotificationPolicyLogRecord.TYPE_PERSONAL_NOTIFICATION_FAILED
+    assert last_log_record.reason == "Telegram rate limit exceeded"
+    assert (
+        last_log_record.notification_error_code
+        == UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_TELEGRAM_RATELIMIT
+    )
+
 
 @patch("apps.base.models.UserNotificationPolicy.get_default_fallback_policy")
 @patch("apps.base.tests.messaging_backend.TestOnlyBackend.notify_user")

diff --git a/engine/apps/base/models/user_notification_policy_log_record.py b/engine/apps/base/models/user_notification_policy_log_record.py
@@ -106,7 +106,8 @@ class UserNotificationPolicyLogRecord(models.Model):
         ERROR_NOTIFICATION_TELEGRAM_USER_IS_DEACTIVATED,
         ERROR_NOTIFICATION_MOBILE_USER_HAS_NO_ACTIVE_DEVICE,
         ERROR_NOTIFICATION_FORMATTING_ERROR,
-    ) = range(29)
+        ERROR_NOTIFICATION_IN_TELEGRAM_RATELIMIT,
+    ) = range(30)
 
     # for this errors we want to send message to general log channel
     ERRORS_TO_SEND_IN_SLACK_CHANNEL = [
@@ -304,6 +305,10 @@ def render_log_line_action(self, for_slack=False, substitute_author_with_tag=Fal
                 result += f"failed to notify {user_verbal} in Slack, because channel is archived"
             elif self.notification_error_code == UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_SLACK_RATELIMIT:
                 result += f"failed to notify {user_verbal} in Slack due to Slack rate limit"
+            elif (
+                self.notification_error_code == UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_IN_TELEGRAM_RATELIMIT
+            ):
+                result += f"failed to notify {user_verbal} in Telegram due to Telegram rate limit"
             elif self.notification_error_code == UserNotificationPolicyLogRecord.ERROR_NOTIFICATION_FORBIDDEN:
                 result += f"failed to notify {user_verbal}, not allowed"
             elif (

diff --git a/engine/apps/grafana_plugin/helpers/client.py b/engine/apps/grafana_plugin/helpers/client.py
@@ -11,6 +11,9 @@
 from apps.api.permissions import GrafanaAPIPermission, GrafanaAPIPermissions
 from common.constants.plugin_ids import PluginID
 
+if typing.TYPE_CHECKING:
+    from apps.user_management.models import Organization
+
 logger = logging.getLogger(__name__)
 
 
@@ -309,6 +312,9 @@ def get_grafana_incident_plugin_settings(self) -> APIClientResponse["GrafanaAPIC
     def get_grafana_labels_plugin_settings(self) -> APIClientResponse["GrafanaAPIClient.Types.PluginSettings"]:
         return self.get_grafana_plugin_settings(PluginID.LABELS)
 
+    def get_grafana_irm_plugin_settings(self) -> APIClientResponse["GrafanaAPIClient.Types.PluginSettings"]:
+        return self.get_grafana_plugin_settings(PluginID.IRM)
+
     def get_service_account(self, login: str) -> APIClientResponse["GrafanaAPIClient.Types.ServiceAccountResponse"]:
         return self.api_get(f"api/serviceaccounts/search?query={login}")
 
@@ -328,8 +334,8 @@ def create_service_account_token(
     def get_service_account_token_permissions(self) -> APIClientResponse[typing.Dict[str, typing.List[str]]]:
         return self.api_get("api/access-control/user/permissions")
 
-    def sync(self) -> APIClientResponse:
-        return self.api_post("api/plugins/grafana-oncall-app/resources/plugin/sync")
+    def sync(self, organization: "Organization") -> APIClientResponse:
+        return self.api_post(f"api/plugins/{organization.active_ui_plugin_id}/resources/plugin/sync")
 
     @staticmethod
     def validate_grafana_token_format(grafana_token: str) -> bool:

diff --git a/engine/apps/grafana_plugin/serializers/sync_data.py b/engine/apps/grafana_plugin/serializers/sync_data.py
@@ -71,6 +71,7 @@ class SyncOnCallSettingsSerializer(serializers.Serializer):
     incident_enabled = serializers.BooleanField()
     incident_backend_url = serializers.CharField(allow_blank=True)
     labels_enabled = serializers.BooleanField()
+    irm_enabled = serializers.BooleanField(default=False)
 
     def create(self, validated_data):
         return SyncSettings(**validated_data)

diff --git a/engine/apps/grafana_plugin/sync_data.py b/engine/apps/grafana_plugin/sync_data.py
@@ -40,6 +40,7 @@ class SyncSettings:
     incident_enabled: bool
     incident_backend_url: str
     labels_enabled: bool
+    irm_enabled: bool
 
 
 @dataclass

diff --git a/engine/apps/grafana_plugin/tasks/sync_v2.py b/engine/apps/grafana_plugin/tasks/sync_v2.py
@@ -49,7 +49,7 @@ def sync_organizations_v2(org_ids=None):
     organization_qs = Organization.objects.filter(id__in=org_ids)
     for org in organization_qs:
         client = GrafanaAPIClient(api_url=org.grafana_url, api_token=org.api_token)
-        _, status = client.sync()
+        _, status = client.sync(org)
         if status["status_code"] != 200:
             logger.error(
                 f"Failed to request sync org_id={org.pk} stack_slug={org.stack_slug} status_code={status['status_code']} url={status['url']} message={status['message']}"

diff --git a/engine/apps/grafana_plugin/tests/test_sync_v2.py b/engine/apps/grafana_plugin/tests/test_sync_v2.py
@@ -12,7 +12,8 @@
 from apps.api.permissions import LegacyAccessControlRole
 from apps.grafana_plugin.serializers.sync_data import SyncTeamSerializer
 from apps.grafana_plugin.sync_data import SyncData, SyncSettings, SyncUser
-from apps.grafana_plugin.tasks.sync_v2 import start_sync_organizations_v2
+from apps.grafana_plugin.tasks.sync_v2 import start_sync_organizations_v2, sync_organizations_v2
+from common.constants.plugin_ids import PluginID
 
 
 @pytest.mark.django_db
@@ -121,6 +122,7 @@ def test_sync_v2_content_encoding(
             incident_enabled=False,
             incident_backend_url="",
             labels_enabled=False,
+            irm_enabled=False,
         ),
     )
 
@@ -140,6 +142,57 @@ def test_sync_v2_content_encoding(
         mock_sync.assert_called()
 
 
+@pytest.mark.parametrize(
+    "irm_enabled,expected",
+    [
+        (True, True),
+        (False, False),
+    ],
+)
+@pytest.mark.django_db
+def test_sync_v2_irm_enabled(
+    make_organization_and_user_with_plugin_token,
+    make_user_auth_headers,
+    settings,
+    irm_enabled,
+    expected,
+):
+    settings.LICENSE = settings.CLOUD_LICENSE_NAME
+    organization, _, token = make_organization_and_user_with_plugin_token()
+
+    assert organization.is_grafana_irm_enabled is False
+
+    client = APIClient()
+    headers = make_user_auth_headers(None, token, organization=organization)
+    url = reverse("grafana-plugin:sync-v2")
+
+    data = SyncData(
+        users=[],
+        teams=[],
+        team_members={},
+        settings=SyncSettings(
+            stack_id=organization.stack_id,
+            org_id=organization.org_id,
+            license=settings.CLOUD_LICENSE_NAME,
+            oncall_api_url="http://localhost",
+            oncall_token="",
+            grafana_url="http://localhost",
+            grafana_token="fake_token",
+            rbac_enabled=False,
+            incident_enabled=False,
+            incident_backend_url="",
+            labels_enabled=False,
+            irm_enabled=irm_enabled,
+        ),
+    )
+
+    response = client.post(url, format="json", data=asdict(data), **headers)
+    assert response.status_code == status.HTTP_200_OK
+
+    organization.refresh_from_db()
+    assert organization.is_grafana_irm_enabled == expected
+
+
 @pytest.mark.parametrize(
     "test_team, validation_pass",
     [
@@ -190,3 +243,23 @@ def check_call(actual, expected):
             assert check_call(actual_call, expected_call)
 
         assert mock_sync.call_count == len(expected_calls)
+
+
+@patch(
+    "apps.grafana_plugin.tasks.sync_v2.GrafanaAPIClient.api_post",
+    return_value=(None, {"status_code": status.HTTP_200_OK}),
+)
+@pytest.mark.parametrize(
+    "is_grafana_irm_enabled,expected",
+    [
+        (True, PluginID.IRM),
+        (False, PluginID.ONCALL),
+    ],
+)
+@pytest.mark.django_db
+def test_sync_organizations_v2_calls_right_backend_plugin_sync_endpoint(
+    mocked_grafana_api_client_api_post, make_organization, is_grafana_irm_enabled, expected
+):
+    org = make_organization(is_grafana_irm_enabled=is_grafana_irm_enabled)
+    sync_organizations_v2(org_ids=[org.pk])
+    mocked_grafana_api_client_api_post.assert_called_once_with(f"api/plugins/{expected}/resources/plugin/sync")
diff --git a/engine/apps/public_api/serializers/escalation_chains.py b/engine/apps/public_api/serializers/escalation_chains.py
@@ -2,14 +2,17 @@
 
 from apps.alerts.models import EscalationChain
 from common.api_helpers.custom_fields import TeamPrimaryKeyRelatedField
+from common.api_helpers.mixins import EagerLoadingMixin
 from common.api_helpers.utils import CurrentOrganizationDefault
 
 
-class EscalationChainSerializer(serializers.ModelSerializer):
+class EscalationChainSerializer(EagerLoadingMixin, serializers.ModelSerializer):
     id = serializers.ReadOnlyField(source="public_primary_key")
     organization = serializers.HiddenField(default=CurrentOrganizationDefault())
     team_id = TeamPrimaryKeyRelatedField(required=False, allow_null=True, source="team")
 
+    SELECT_RELATED = ["organization", "team"]
+
     class Meta:
         model = EscalationChain
         fields = (

diff --git a/engine/apps/public_api/serializers/escalation_policies.py b/engine/apps/public_api/serializers/escalation_policies.py
@@ -107,7 +107,13 @@ class Meta:
         ]
 
     PREFETCH_RELATED = ["notify_to_users_queue"]
-    SELECT_RELATED = ["escalation_chain"]
+    SELECT_RELATED = [
+        "custom_webhook",
+        "escalation_chain",
+        "notify_schedule",
+        "notify_to_group",
+        "notify_to_team_members",
+    ]
 
     @cached_property
     def escalation_chain(self):

diff --git a/engine/apps/public_api/serializers/integrations.py b/engine/apps/public_api/serializers/integrations.py
@@ -85,7 +85,7 @@ class IntegrationSerializer(EagerLoadingMixin, serializers.ModelSerializer, Main
     description_short = serializers.CharField(max_length=250, required=False, allow_null=True)
 
     PREFETCH_RELATED = ["channel_filters"]
-    SELECT_RELATED = ["organization", "integration_heartbeat"]
+    SELECT_RELATED = ["organization", "integration_heartbeat", "team"]
 
     class Meta:
         model = AlertReceiveChannel

diff --git a/engine/apps/public_api/serializers/on_call_shifts.py b/engine/apps/public_api/serializers/on_call_shifts.py
@@ -122,7 +122,7 @@ class Meta:
             "source": {"required": False, "write_only": True},
         }
 
-    SELECT_RELATED = ["schedule"]
+    SELECT_RELATED = ["organization", "team", "schedule"]
     PREFETCH_RELATED = ["schedules", "users"]
 
     def create(self, validated_data):

diff --git a/engine/apps/public_api/serializers/routes.py b/engine/apps/public_api/serializers/routes.py
@@ -4,6 +4,7 @@
 from apps.base.messaging import get_messaging_backend_from_id, get_messaging_backends
 from common.api_helpers.custom_fields import OrganizationFilteredPrimaryKeyRelatedField
 from common.api_helpers.exceptions import BadRequest
+from common.api_helpers.mixins import EagerLoadingMixin
 from common.api_helpers.utils import valid_jinja_template_for_serializer_method_field
 from common.jinja_templater.apply_jinja_template import JinjaTemplateError
 from common.ordered_model.serializer import OrderedModelSerializer
@@ -129,7 +130,7 @@ def to_internal_value(self, data):
         raise BadRequest(detail="Invalid route type")
 
 
-class ChannelFilterSerializer(BaseChannelFilterSerializer):
+class ChannelFilterSerializer(EagerLoadingMixin, BaseChannelFilterSerializer):
     id = serializers.CharField(read_only=True, source="public_primary_key")
     slack = serializers.DictField(required=False)
     telegram = serializers.DictField(required=False)
@@ -146,6 +147,8 @@ class ChannelFilterSerializer(BaseChannelFilterSerializer):
 
     is_the_last_route = serializers.BooleanField(read_only=True, source="is_default")
 
+    SELECT_RELATED = ["alert_receive_channel", "escalation_chain"]
+
     class Meta:
         model = ChannelFilter
         fields = OrderedModelSerializer.Meta.fields + [