From 67aed892b190b081f21146912f04480798d1e4ad Mon Sep 17 00:00:00 2001 From: Fabio Ambauen Date: Mon, 18 Sep 2023 11:15:21 +0200 Subject: [PATCH] feat(healthz): overhaul health-checks for them to be less intrusive This commit removes writing to the DB and MinIO as part of the health-checks. Additionally also DB-permissions, etc are not checked anymore. Those checks did not provide any real value, but resulted in unnecessary lead on the systems. --- caluma/caluma_core/conftest.py | 232 +------------- caluma/caluma_core/health_checks.py | 94 +----- .../__snapshots__/test_health_checks.ambr | 282 +----------------- .../caluma_core/tests/test_health_checks.py | 179 +---------- caluma/settings/django.py | 1 - 5 files changed, 26 insertions(+), 762 deletions(-) diff --git a/caluma/caluma_core/conftest.py b/caluma/caluma_core/conftest.py index 1da3d3aaa..d0f12e0b3 100644 --- a/caluma/caluma_core/conftest.py +++ b/caluma/caluma_core/conftest.py @@ -2,13 +2,10 @@ import sys import pytest -import requests from django.conf import settings from django.core import management from django.db import connection -from minio import Minio from psycopg2 import OperationalError -from requests.models import Response from watchman.decorators import check as watchman_check_decorator from caluma.caluma_form import storage_clients @@ -83,156 +80,12 @@ def db_broken_connection(transactional_db): connection.connect() -def switch_user(test=False): - """Re-connect to database with either user 'tester' or 'caluma'.""" - # set username and password - user = "tester" if test else "caluma" - password = "test" if test else "caluma" - - # connect new user to database - connection.close() - settings.DATABASES["default"]["USER"] = user - settings.DATABASES["default"]["PASSWORD"] = password - connection.connect() - - -@pytest.fixture -def db_user_tester(transactional_db): - """Create new user 'tester' with privileges necessary for health checks. - - User is removed after use. - """ - # get db connection - cursor = connection.cursor() - - # create test user (with all necessary privileges) - cursor.execute("CREATE USER tester WITH PASSWORD 'test';") - cursor.execute("GRANT ALL ON caluma_logging_accesslog TO tester;") - cursor.execute("GRANT ALL ON caluma_form_file TO tester;") - cursor.execute( - "GRANT USAGE, SELECT ON SEQUENCE caluma_logging_accesslog_id_seq TO tester;" - ) - cursor.execute("GRANT ALL ON django_migrations TO tester;") - - yield - - # get db connection - cursor = connection.cursor() - - # revoke privileges and remove user - cursor.execute("REVOKE ALL ON caluma_logging_accesslog FROM tester;") - cursor.execute("REVOKE ALL ON caluma_form_file FROM tester;") - cursor.execute( - "REVOKE USAGE, SELECT ON SEQUENCE caluma_logging_accesslog_id_seq FROM tester;" - ) - cursor.execute("REVOKE ALL ON django_migrations FROM tester;") - cursor.execute("DROP USER tester;") - - -@pytest.fixture -def db_privileges_working(db_user_tester): - """DB fixture with all necessary privileges for health checks. - - Create new test user with necessary privileges and connect to database. - Remove test user and reconnect with original database user after use. - """ - # switch to new test user - switch_user(test=True) - - yield - - # switch back to caluma user - switch_user() - - -@pytest.fixture -def db_read_error(db_user_tester): - """DB fixture with revoked read privileges on caluma_logging_accesslog. - - Create new test user without necessary read privileges for table used by - health check and connect to database. - Remove test user and reconnect with original database user after use. - """ - # revoke select rights on accesslog table - cursor = connection.cursor() - cursor.execute("REVOKE SELECT ON caluma_logging_accesslog FROM tester;") - - # switch to new test user - switch_user(test=True) - - yield - - # switch back to caluma user - switch_user() - - # grant select rights on accesslog table such that user can be removed - cursor = connection.cursor() - cursor.execute("GRANT SELECT ON caluma_logging_accesslog TO tester;") - - -@pytest.fixture -def db_write_error(db_user_tester): - """DB fixture with revoked write privileges on caluma_logging_accesslog. - - Create new test user without necessary write privileges for table used by - health check and connect to database. - Remove test user and reconnect with original database user after use. - """ - # revoke insert, update and delete privileges on accesslog table - cursor = connection.cursor() - cursor.execute( - "REVOKE INSERT,UPDATE,DELETE ON caluma_logging_accesslog FROM tester;" - ) - - # switch to new test user - switch_user(test=True) - - yield - - # switch back to caluma user - switch_user() - - # grant insert, update, delete rights on accesslog table - # such that user can be removed - cursor = connection.cursor() - cursor.execute("GRANT INSERT,UPDATE,DELETE ON caluma_logging_accesslog TO tester;") - - @pytest.fixture -def minio_mock_working(minio_mock, mocker): - """Provide working minio mock for health checks. - - Based on existing minio_mock. - Patches requests.get and requests.put to provide necessary responses for - health check. - """ - # determine fixed UUID to be used during tests - uuid = "7c5ad424-e351-4db8-ba37-bbace45d0e0f" - mocker.patch("uuid.uuid4", return_value=uuid) - - # mock upload response - upload_response = Response() - upload_response.status_code = 200 - mocker.patch("requests.put", return_value=upload_response) - - # mock download response - download_response = Response() - download_response.status_code = 200 - download_response._content = ( - b"--ffc537c8d0da305912d6440576ac0d75\r\n" - b'Content-Disposition: form-data; name="file"; ' - b'filename="healthz_tmp-file.txt"\r\n\r\n' - b"Test\nTest\n\r\n" - b"--ffc537c8d0da305912d6440576ac0d75--\r\n" +def minio_mock_working(mocker): + """Provide working minio mock for health checks.""" + return mocker.patch.object( + storage_clients.client.client, "bucket_exists", return_value=False ) - mocker.patch("requests.get", return_value=download_response) - - # change object name of existing stat response - stat_response = Minio.stat_object.return_value - stat_response._object_name = "healthz_tmp-object_" + uuid - - # update responses compatible with media storage service health check - Minio.stat_object.side_effect = [stat_response, None] @pytest.fixture @@ -276,83 +129,6 @@ def minio_not_configured(): storage_clients.client = prev_client -@pytest.fixture -def minio_mock_read_error(minio_mock, mocker): - """Produce exception when downloading file or stat-ing object. - - This scenario might occur when there is a internal minio issue or if a - previous upload isn't successful (which doesn't issue exception). - """ - - # raise exception instead of handling request - def download_side_effect(*args, **kwargs): # pragma: no cover - raise requests.exceptions.RequestException("GET request failed") - - def stat_side_effect(*args, **kwargs): - raise Exception("stat_object failed") - - # determine fixed UUID to be used during tests - uuid = "7c5ad424-e351-4db8-ba37-bbace45d0e0f" - mocker.patch("uuid.uuid4", return_value=uuid) - - # mock upload response - upload_response = Response() - upload_response.status_code = 200 - mocker.patch("requests.put", return_value=upload_response) - - # mock erroneous download response - download_response = Response() - download_response.status_code = 500 - mocker.patch( - "requests.get", return_value=download_response, side_effect=download_side_effect - ) - - # raise exception - Minio.stat_object.side_effect = stat_side_effect - - -@pytest.fixture -def minio_mock_write_error(minio_mock, mocker): - """Produce exception when uploading file. - - This scenario might occur when there is a internal minio issue. - """ - - # raise exception instead of handling request - def upload_side_effect(*args, **kwargs): - raise requests.exceptions.RequestException("PUT request failed") - - # determine fixed UUID to be used during tests - uuid = "7c5ad424-e351-4db8-ba37-bbace45d0e0f" - mocker.patch("uuid.uuid4", return_value=uuid) - - # mock upload response - upload_response = Response() - upload_response.status_code = 500 - mocker.patch( - "requests.put", return_value=upload_response, side_effect=upload_side_effect - ) - - # mock download response - download_response = Response() - download_response.status_code = 200 - download_response._content = ( - b"--ffc537c8d0da305912d6440576ac0d75\r\n" - b'Content-Disposition: form-data; name="file"; ' - b'filename="healthz_tmp-file.txt"\r\n\r\n' - b"Test\nTest\n\r\n" - b"--ffc537c8d0da305912d6440576ac0d75--\r\n" - ) - mocker.patch("requests.get", return_value=download_response) - - # change object name of existing stat response - stat_response = Minio.stat_object.return_value - stat_response._object_name = "healthz_tmp-object_" + uuid - - # update responses compatible with media storage service health check - Minio.stat_object.side_effect = [stat_response, None] - - @pytest.fixture def roll_back_migrations(): """Roll back database migrations to ensure unapplied migrations exist.""" diff --git a/caluma/caluma_core/health_checks.py b/caluma/caluma_core/health_checks.py index c8e13d8e4..8d0673970 100644 --- a/caluma/caluma_core/health_checks.py +++ b/caluma/caluma_core/health_checks.py @@ -1,13 +1,11 @@ -import uuid from io import StringIO +from uuid import uuid4 -import requests from django.conf import settings from django.core import management from watchman.decorators import check from caluma.caluma_form import storage_clients -from caluma.caluma_logging.models import AccessLog @check @@ -29,87 +27,13 @@ def _check_pending_migrations(db_name): @check def _check_media_storage_service(): - """Check media storage service connection and operations.""" - # generate object name - storage_client = storage_clients.client - prefix = "healthz_tmp-object_" - object_name = prefix + str(uuid.uuid4()) - - try: - # request upload url and upload - upload_url = storage_client.upload_url(object_name) - - filename = "healthz_tmp-file.txt" - content = "Test\nTest\n" - files = {"file": (filename, content)} - upload_request = requests.put(upload_url, files=files) - assert upload_request.status_code == 200 - - # stat object - stat = storage_client.stat_object(object_name) - assert stat.object_name == object_name - - # request download url and download file - download_url = storage_client.download_url(object_name) - - download_request = requests.get(download_url) - assert download_request.status_code == 200 - assert filename in download_request.text and content in download_request.text - - # remove object - storage_client.remove_object(object_name) - assert not storage_client.stat_object(object_name, suppress_warning=True) - - return {"ok": True} - - finally: - storage_client.remove_object(object_name) - - -@check -def _check_models(database): - """Check model instantion and operations on database.""" - log = None - try: - # Instantiate model - log = AccessLog.objects.create(status_code=22) - assert ( - log in AccessLog.objects.all() - ), "Failed to save model instance to database." - - # Retrieve object - retrieved_log = AccessLog.objects.get(pk=log.pk) - assert log == retrieved_log, "Unexpected object retrieved from database." - assert ( - retrieved_log.status_code == log.status_code - ), "Unexpected object data retrieved from database." - - # Update object - log.status_code = 42 - log.save() - retrieved_log = AccessLog.objects.get(pk=log.pk) - assert log == retrieved_log, "Unexpected object retrieved from database." - assert ( - retrieved_log.status_code == log.status_code - ), "Unexpected object data retrieved from database." - - # Remove object - log.delete() - log = None - assert ( - log not in AccessLog.objects.all() - ), "Failed to delete model instance from database." - - return {database: {"ok": True}} - - finally: - # Cleanup - if log: # pragma: no cover - log.delete() + """Check media storage service connectivity.""" + assert not storage_clients.client.client.bucket_exists(str(uuid4())) + return {"ok": True} def check_media_storage_service(): - """Check media storage service connection and operations.""" + """Check media storage service connectivity.""" # Check if media storage service is available if not storage_clients.client: return {"media storage service (not configured)": {"ok": True}} @@ -123,11 +47,3 @@ def check_migrations(): checked_databases = [_check_pending_migrations(db_name) for db_name in databases] return {"database migrations": checked_databases} - - -def check_models(): - """Check model instantiation.""" - databases = sorted(settings.DATABASES) - checked_databases = [_check_models(db_name) for db_name in databases] - - return {"database models": checked_databases} diff --git a/caluma/caluma_core/tests/__snapshots__/test_health_checks.ambr b/caluma/caluma_core/tests/__snapshots__/test_health_checks.ambr index 3fe395a3d..b8b44928b 100644 --- a/caluma/caluma_core/tests/__snapshots__/test_health_checks.ambr +++ b/caluma/caluma_core/tests/__snapshots__/test_health_checks.ambr @@ -15,13 +15,6 @@ }), }), ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': False, - }), - }), - ]), 'databases': list([ dict({ 'default': dict({ @@ -50,13 +43,6 @@ }), }), ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), 'databases': list([ dict({ 'default': dict({ @@ -85,13 +71,6 @@ }), }), ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), 'databases': list([ dict({ 'default': dict({ @@ -120,188 +99,6 @@ }), }), ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'databases': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'media storage service': dict({ - 'ok': True, - }), - }) -# --- -# name: test_db_model_accessible - dict({ - 'caches': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database migrations': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'databases': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'media storage service': dict({ - 'ok': True, - }), - }) -# --- -# name: test_db_model_read_error - dict({ - 'caches': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database migrations': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': False, - }), - }), - ]), - 'databases': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'media storage service': dict({ - 'ok': True, - }), - }) -# --- -# name: test_db_model_write_error - dict({ - 'caches': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database migrations': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': False, - }), - }), - ]), - 'databases': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'media storage service': dict({ - 'ok': True, - }), - }) -# --- -# name: test_minio_connection_broken - dict({ - 'caches': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database migrations': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'databases': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'media storage service': dict({ - 'ok': False, - }), - }) -# --- -# name: test_minio_connection_working - dict({ - 'caches': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database migrations': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), 'databases': list([ dict({ 'default': dict({ @@ -314,7 +111,7 @@ }), }) # --- -# name: test_minio_not_configured +# name: test_storage_service_not_configured dict({ 'caches': list([ dict({ @@ -330,13 +127,6 @@ }), }), ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), 'databases': list([ dict({ 'default': dict({ @@ -349,73 +139,3 @@ }), }) # --- -# name: test_minio_read_error - dict({ - 'caches': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database migrations': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'databases': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'media storage service': dict({ - 'ok': False, - }), - }) -# --- -# name: test_minio_write_error - dict({ - 'caches': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database migrations': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'database models': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'databases': list([ - dict({ - 'default': dict({ - 'ok': True, - }), - }), - ]), - 'media storage service': dict({ - 'ok': False, - }), - }) -# --- diff --git a/caluma/caluma_core/tests/test_health_checks.py b/caluma/caluma_core/tests/test_health_checks.py index afd51009e..8d6408997 100644 --- a/caluma/caluma_core/tests/test_health_checks.py +++ b/caluma/caluma_core/tests/test_health_checks.py @@ -1,3 +1,4 @@ +import pytest from django.core import management from django.urls import reverse from watchman import settings as watchman_settings @@ -19,7 +20,6 @@ def test_db_connection_working( # assert database checks pass json_response = response.json() assert json_response["databases"] == [{"default": {"ok": True}}] - assert json_response["database models"] == [{"default": {"ok": True}}] assert json_response["database migrations"] == [{"default": {"ok": True}}] assert response.status_code == 200 @@ -50,7 +50,6 @@ def test_db_connection_broken( # assert database checks fail json_response = response.json() assert json_response["databases"] == [{"default": {"ok": False}}] - assert json_response["database models"] == [{"default": {"ok": False}}] assert json_response["database migrations"] == [{"default": {"ok": False}}] assert response.status_code == watchman_settings.WATCHMAN_ERROR_CODE @@ -62,137 +61,34 @@ def test_db_connection_broken( ) -def test_db_model_accessible( - disable_logs, - track_errors, - capsys, - client, - snapshot, - minio_mock_working, - db_privileges_working, -): - """Test /healthz/ endpoint response when necessary DB tables are accessible. - - Provide a database connected through a new test user with the necessary - privileges to access the tables needed in the health checks. - Passes database models health check and should be able to access the - necessary model table. - """ - # get /healthz/ response and compare to previous snapshot - response = client.get(reverse("healthz")) - snapshot.assert_match(response.json()) - - # assert database models check passes - assert response.json()["database models"] == [{"default": {"ok": True}}] - assert response.status_code == 200 - - # assert exception type - *_, err = capsys.readouterr() - assert not err - - -def test_db_model_read_error( - disable_logs, - track_errors, - capsys, - client, - snapshot, - minio_mock_working, - db_read_error, -): - """Test /healthz/ endpoint response when necessary DB tables are inaccessible. - - Provide a database connected through a new test user without the necessary - privileges to access the tables needed in the health checks. - Affects the database models health check when trying to access the - specified model table and triggers a read error due to lack of privileges. - """ - # get /healthz/ response and compare to previous snapshot - response = client.get(reverse("healthz")) - snapshot.assert_match(response.json()) - - # assert database models check fails - assert response.json()["database models"] == [{"default": {"ok": False}}] - assert response.status_code == watchman_settings.WATCHMAN_ERROR_CODE - - # assert exception type - *_, err = capsys.readouterr() - assert "django.db.utils.ProgrammingError: permission denied" in err - - -def test_db_model_write_error( - disable_logs, - track_errors, - capsys, - client, - snapshot, - minio_mock_working, - db_write_error, -): - """Test /healthz/ endpoint response when necessary DB tables are read-only. - - Provide a database connected through a new test user without the necessary - privileges to write to the tables needed in the health checks. - Affects the database models health check when trying to create, update or - delete an object for the specified model table and triggers write error - due to lack of privileges. - """ - # get /healthz/ response and compare to previous snapshot - response = client.get(reverse("healthz")) - snapshot.assert_match(response.json()) - - # assert database models check fails - assert response.json()["database models"] == [{"default": {"ok": False}}] - assert response.status_code == watchman_settings.WATCHMAN_ERROR_CODE - - # assert exception type - *_, err = capsys.readouterr() - assert "django.db.utils.ProgrammingError: permission denied" in err - - -def test_minio_connection_working( - disable_logs, track_errors, capsys, client, snapshot, db, minio_mock_working +@pytest.mark.parametrize("success", [True, False]) +def test_storage_service_connectivity( + success, disable_logs, track_errors, capsys, client, db, minio_mock_working ): """Test /healthz/ endpoint response when minio connection is working. Provide minio mock that passes the media storage service health check. """ - # get /healthz/ response and compare to previous snapshot - response = client.get(reverse("healthz")) - snapshot.assert_match(response.json()) + if not success: + minio_mock_working.side_effect = Exception("foo bar") - # assert media storage service check passes - assert response.json()["media storage service"] == {"ok": True} - assert response.status_code == 200 - - # assert exception type - *_, err = capsys.readouterr() - assert not err - - -def test_minio_connection_broken( - disable_logs, track_errors, capsys, client, snapshot, db, minio_broken_connection -): - """Test /healthz/ endpoint response when minio connection is broken. - - Provide new minio client with faulty connection information. - Affects media storage service health check and should trigger - connection error. - """ # get /healthz/ response and compare to previous snapshot response = client.get(reverse("healthz")) - snapshot.assert_match(response.json()) - # assert media storage service check fails - assert response.json()["media storage service"] == {"ok": False} - assert response.status_code == watchman_settings.WATCHMAN_ERROR_CODE + # assert media storage service check passes + assert response.json()["media storage service"] == {"ok": success} + assert ( + response.status_code == 200 + if success + else watchman_settings.WATCHMAN_ERROR_CODE + ) # assert exception type *_, err = capsys.readouterr() - assert "urllib3.exceptions.MaxRetryError" in err + assert not err if success else err -def test_minio_not_configured( +def test_storage_service_not_configured( disable_logs, track_errors, capsys, client, snapshot, db, minio_not_configured ): """Test /healthz/ endpoint response when minio is not configured. @@ -215,49 +111,6 @@ def test_minio_not_configured( assert not err -def test_minio_read_error( - disable_logs, track_errors, capsys, client, snapshot, db, minio_mock_read_error -): - """Test /healthz/ endpoint response when minio receives a read error. - - Mocks minio read functions to produce an error. - Affects media storage service health check file download and stat_object - functions. - """ - # get /healthz/ response and compare to previous snapshot - response = client.get(reverse("healthz")) - snapshot.assert_match(response.json()) - - # assert media storage service check fails - assert response.json()["media storage service"] == {"ok": False} - assert response.status_code == watchman_settings.WATCHMAN_ERROR_CODE - - # assert exception type - *_, err = capsys.readouterr() - assert "Exception: stat_object failed" in err - - -def test_minio_write_error( - disable_logs, track_errors, capsys, client, snapshot, db, minio_mock_write_error -): - """Test /healthz/ endpoint response when minio receives a write error. - - Mocks minio write functions to produce an error. - Affects media storage service health check file upload function. - """ - # get /healthz/ response and compare to previous snapshot - response = client.get(reverse("healthz")) - snapshot.assert_match(response.json()) - - # assert media storage service check fails - assert response.json()["media storage service"] == {"ok": False} - assert response.status_code == watchman_settings.WATCHMAN_ERROR_CODE - - # assert exception type - *_, err = capsys.readouterr() - assert "requests.exceptions.RequestException: PUT request failed" in err - - def test_db_migrations_applied( disable_logs, track_errors, capsys, client, snapshot, db, minio_mock_working ): @@ -286,9 +139,9 @@ def test_db_migrations_applied( def test_db_migrations_not_applied( + capsys, disable_logs, track_errors, - capsys, client, snapshot, db, diff --git a/caluma/settings/django.py b/caluma/settings/django.py index 76c6facc0..56684a095 100644 --- a/caluma/settings/django.py +++ b/caluma/settings/django.py @@ -200,7 +200,6 @@ def parse_admins(admins): # pragma: no cover default=( "caluma.caluma_core.health_checks.check_migrations", "caluma.caluma_core.health_checks.check_media_storage_service", - "caluma.caluma_core.health_checks.check_models", "watchman.checks.caches", "watchman.checks.databases", ),