From d0eb92a4604e967fa91d42b58f0f3767188df980 Mon Sep 17 00:00:00 2001 From: Johannes Christ Date: Sun, 10 Dec 2023 15:39:11 +0100 Subject: [PATCH] Implement the github-filter worker in the API The current github-filter worker, found at https://github.com/python-discord/workers/blob/main/github-filter/src/index.ts, fails to work at present because Discord's webhook endpoints block Cloudflare's IP ranges from accessing this endpoint. Whilst they use Cloudflare to guard themselves, it seems they do not wish others to use it. Implement it on the site to circumvent IP restrictions and allow to modify the code in Python. --- .../api/tests/test_github_webhook_filter.py | 49 +++++++++ pydis_site/apps/api/urls.py | 12 ++- pydis_site/apps/api/views.py | 101 ++++++++++++++++++ 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 pydis_site/apps/api/tests/test_github_webhook_filter.py diff --git a/pydis_site/apps/api/tests/test_github_webhook_filter.py b/pydis_site/apps/api/tests/test_github_webhook_filter.py new file mode 100644 index 0000000000..c587a10b46 --- /dev/null +++ b/pydis_site/apps/api/tests/test_github_webhook_filter.py @@ -0,0 +1,49 @@ +from unittest import mock + +from django.urls import reverse +from rest_framework.test import APITestCase + + +class GitHubWebhookFilterAPITests(APITestCase): + def test_ignores_bot_senders(self): + url = reverse('api:github-webhook-filter', args=('id', 'token')) + senders = ('coveralls', 'lemon[bot]') + for sender in senders: + with self.subTest(sender=sender): + payload = {'sender': {'login': sender}} + headers = {'X-GitHub-Event': 'pull_request_review'} + response = self.client.post(url, data=payload, headers=headers) + self.assertEqual(response.status_code, 203) + + def test_accepts_interesting_events(self): + url = reverse('api:github-webhook-filter', args=('id', 'token')) + payload = { + 'ref': 'refs/heads/master', + 'pull_request': { + 'user': { + 'login': "lemon", + } + }, + 'review': { + 'state': 'commented', + 'body': "Amazing!!!" + }, + 'repository': { + 'name': 'black', + 'owner': { + 'login': 'psf', + } + } + } + headers = {'X-GitHub-Event': 'pull_request_review'} + + with mock.patch('urllib.request.urlopen') as urlopen: + urlopen.return_value = mock.MagicMock() + context_mock = urlopen.return_value.__enter__.return_value + context_mock.status = 299 + context_mock.getheaders.return_value = [('X-Clacks-Overhead', 'Joe Armstrong')] + context_mock.read.return_value = b'{"status": "ok"}' + + response = self.client.post(url, data=payload, headers=headers) + self.assertEqual(response.status_code, context_mock.status) + self.assertEqual(response.headers.get('X-Clacks-Overhead'), 'Joe Armstrong') diff --git a/pydis_site/apps/api/urls.py b/pydis_site/apps/api/urls.py index f872ba9202..80d4edc294 100644 --- a/pydis_site/apps/api/urls.py +++ b/pydis_site/apps/api/urls.py @@ -1,7 +1,12 @@ from django.urls import include, path from rest_framework.routers import DefaultRouter -from .views import GitHubArtifactsView, HealthcheckView, RulesView +from .views import ( + GitHubArtifactsView, + GitHubWebhookFilterView, + HealthcheckView, + RulesView, +) from .viewsets import ( AocAccountLinkViewSet, AocCompletionistBlockViewSet, @@ -101,4 +106,9 @@ GitHubArtifactsView.as_view(), name="github-artifacts" ), + path( + 'github/webhook-filter//', + GitHubWebhookFilterView.as_view(), + name='github-webhook-filter' + ), ) diff --git a/pydis_site/apps/api/views.py b/pydis_site/apps/api/views.py index 829086e767..8613852e46 100644 --- a/pydis_site/apps/api/views.py +++ b/pydis_site/apps/api/views.py @@ -1,3 +1,8 @@ +import json +import urllib.request +from collections.abc import Mapping + +from rest_framework import status from rest_framework.exceptions import ParseError from rest_framework.request import Request from rest_framework.response import Response @@ -226,3 +231,99 @@ def get( "error": str(e), "requested_resource": f"{owner}/{repo}/{sha}/{action_name}/{artifact_name}" }, status=e.status) + + +class GitHubWebhookFilterView(APIView): + """ + Filters uninteresting events from webhooks sent by GitHub to Discord. + + ## Routes + ### POST /github/webhook-filter/:webhook_id/:webhook_token + Takes the GitHub webhook payload as the request body, documented on here: + https://docs.github.com/en/webhooks/webhook-events-and-payloads. The endpoint + will then determine whether the sent webhook event is of interest, + and if so, will forward it to Discord. The response from Discord is + then returned back to the client of this website, including the original + status code and headers (excluding `Content-Type`). + + ## Authentication + Does not require any authentication nor permissions on its own, however, + Discord will validate that the webhook originates from GitHub and respond + with a 403 forbidden error if not. + """ + + authentication_classes = () + permission_classes = () + + def post(self, request: Request, *, webhook_id: str, webhook_token: str) -> Response: + """Filter a webhook POST from GitHub before sending it to Discord.""" + sender = request.data.get('sender', {}).get('login', '') + event = request.headers.get('X-GitHub-Event') + + is_coveralls = 'coveralls' in sender + is_github_bot = '[bot]' in sender + is_sentry = 'sentry-io' in sender + is_dependabot_branch_delete = ( + 'dependabot' in request.data.get('ref', '') + and event == 'delete' + ) + is_bot_pr_approve = ( + '[bot]' in request.data.get('pull_request', {}).get('user', {}).get('login', '') + and event == 'pull_request_review' + ) + is_empty_review = ( + request.data.get('review', {}).get('state') == 'commented' + and event == 'pull_request_review' + and request.data.get('review', {}).get('body') is None + ) + is_black_non_main_push = ( + request.data.get('ref') != 'refs/heads/main' + and request.data.get('repository', {}).get('name') == 'black' + and request.data.get('repository', {}).get('owner', {}).get('login') == 'psf' + and event == 'push' + ) + + is_bot_payload = ( + is_coveralls + or (is_github_bot and not is_sentry) + or is_dependabot_branch_delete + or is_bot_pr_approve + ) + is_noisy_user_action = is_empty_review + should_ignore = is_bot_payload or is_noisy_user_action or is_black_non_main_push + + if should_ignore: + return Response( + {'message': "Ignored by github-filter endpoint"}, + status=status.HTTP_203_NON_AUTHORITATIVE_INFORMATION, + ) + + (response_status, headers, body) = self.send_webhook( + webhook_id, webhook_token, request.data, dict(request.headers), + ) + headers.pop('Connection', None) + headers.pop('Content-Length', None) + return Response(data=body, headers=headers, status=response_status) + + def send_webhook( + self, + webhook_id: str, + webhook_token: str, + data: dict, + headers: Mapping[str, str], + ) -> tuple[int, dict[str, str], bytes]: + """Execute a webhook on Discord's GitHub webhook endpoint.""" + payload = json.dumps(data).encode() + headers.pop('Content-Length', None) + headers.pop('Content-Type', None) + request = urllib.request.Request( # noqa: S310 + f'https://discord.com/api/webhooks/{webhook_id}/{webhook_token}/github?wait=1', + data=payload, + headers={'Content-Type': 'application/json', **headers}, + ) + + try: + with urllib.request.urlopen(request) as response: # noqa: S310 + return (response.status, dict(response.getheaders()), response.read()) + except urllib.error.HTTPError as err: # pragma: no cover + return (err.code, dict(err.headers), err.fp.read())