From ce3d3b50d68fd8154e3cb48f82fc1adbdb5a7723 Mon Sep 17 00:00:00 2001 From: Sivarajan Narayanan Date: Wed, 27 Nov 2024 09:02:36 +0530 Subject: [PATCH 1/4] fix_warm_up_cache --- superset/tasks/cache.py | 25 +++++++++++- superset/tasks/utils.py | 2 +- tests/integration_tests/tasks/test_cache.py | 44 +++++++++++++++++---- tests/integration_tests/tasks/test_utils.py | 19 +++++++-- 4 files changed, 78 insertions(+), 12 deletions(-) diff --git a/superset/tasks/cache.py b/superset/tasks/cache.py index 1509b0d0624aa..94bc3cbb9c6b8 100644 --- a/superset/tasks/cache.py +++ b/superset/tasks/cache.py @@ -18,6 +18,7 @@ from typing import Any, Optional, Union from urllib import request from urllib.error import URLError +from urllib.parse import urlparse from celery.beat import SchedulingError from celery.utils.log import get_task_logger @@ -47,6 +48,23 @@ def get_payload(chart: Slice, dashboard: Optional[Dashboard] = None) -> dict[str return payload +def is_secure_url(url: str) -> bool: + """ + Validates if a URL is secure (uses HTTPS). + + :param url: The URL to validate. + :return: True if the URL uses HTTPS (secure), False if it uses HTTP (non-secure). + """ + try: + parsed_url = urlparse(url) + # Return True for HTTPS, False for HTTP + return parsed_url.scheme == "https" + except ValueError as exception: + # Log a warning with detailed context + logger.warning("Failed to parse URL '%s': %s", url, str(exception)) + return False + + class Strategy: # pylint: disable=too-few-public-methods """ A cache warm up strategy. @@ -220,10 +238,15 @@ def fetch_url(data: str, headers: dict[str, str]) -> dict[str, str]: """ result = {} try: + url = get_url_path("ChartRestApi.warm_up_cache") + + if is_secure_url(url): + logger.info("URL '%s' is secure. Adding Referer header.", url) + headers.update({"Referer": url}) + # Fetch CSRF token for API request headers.update(fetch_csrf_token(headers)) - url = get_url_path("ChartRestApi.warm_up_cache") logger.info("Fetching %s with payload %s", url, data) req = request.Request( url, data=bytes(data, "utf-8"), headers=headers, method="PUT" diff --git a/superset/tasks/utils.py b/superset/tasks/utils.py index 6fc799c4abc2c..e63e3c358297e 100644 --- a/superset/tasks/utils.py +++ b/superset/tasks/utils.py @@ -133,7 +133,7 @@ def fetch_csrf_token( data = json.loads(body) res = {"X-CSRF-Token": data["result"]} if session_cookie is not None: - res["Cookie"] = session_cookie + res["Cookie"] = f"session={session_cookie}" return res logger.error("Error fetching CSRF token, status code: %s", response.status) diff --git a/tests/integration_tests/tasks/test_cache.py b/tests/integration_tests/tasks/test_cache.py index 6e8d3ffe03b4d..c8a055d79c7c9 100644 --- a/tests/integration_tests/tasks/test_cache.py +++ b/tests/integration_tests/tasks/test_cache.py @@ -22,17 +22,32 @@ @pytest.mark.parametrize( - "base_url", + "base_url, expected_referer", [ - "http://base-url", - "http://base-url/", + ("http://base-url", None), + ("http://base-url/", None), + ("https://base-url", "https://base-url/api/v1/chart/warm_up_cache"), + ("https://base-url/", "https://base-url/api/v1/chart/warm_up_cache"), + ], + ids=[ + "Without trailing slash (HTTP)", + "With trailing slash (HTTP)", + "Without trailing slash (HTTPS)", + "With trailing slash (HTTPS)", ], - ids=["Without trailing slash", "With trailing slash"], ) @mock.patch("superset.tasks.cache.fetch_csrf_token") @mock.patch("superset.tasks.cache.request.Request") @mock.patch("superset.tasks.cache.request.urlopen") -def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_url): +@mock.patch("superset.tasks.cache.is_secure_url") +def test_fetch_url( + mock_is_secure_url, + mock_urlopen, + mock_request_cls, + mock_fetch_csrf_token, + base_url, + expected_referer, +): from superset.tasks.cache import fetch_url mock_request = mock.MagicMock() @@ -41,6 +56,9 @@ def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_u mock_urlopen.return_value = mock.MagicMock() mock_urlopen.return_value.code = 200 + # Mock the URL validation to return True for HTTPS and False for HTTP + mock_is_secure_url.return_value = base_url.startswith("https") + initial_headers = {"Cookie": "cookie", "key": "value"} csrf_headers = initial_headers | {"X-CSRF-Token": "csrf_token"} mock_fetch_csrf_token.return_value = csrf_headers @@ -51,13 +69,25 @@ def test_fetch_url(mock_urlopen, mock_request_cls, mock_fetch_csrf_token, base_u result = fetch_url(data, initial_headers) - assert data == result["success"] + # Verify that the Referer header is added correctly for HTTPS URLs + if expected_referer: + assert csrf_headers["Referer"] == expected_referer + + expected_url = ( + f"{base_url}/api/v1/chart/warm_up_cache" + if not base_url.endswith("/") + else f"{base_url}api/v1/chart/warm_up_cache" + ) + mock_fetch_csrf_token.assert_called_once_with(initial_headers) + mock_request_cls.assert_called_once_with( - "http://base-url/api/v1/chart/warm_up_cache", + expected_url, # Use the dynamic URL based on base_url data=data_encoded, headers=csrf_headers, method="PUT", ) # assert the same Request object is used mock_urlopen.assert_called_once_with(mock_request, timeout=mock.ANY) + + assert data == result["success"] diff --git a/tests/integration_tests/tasks/test_utils.py b/tests/integration_tests/tasks/test_utils.py index b1213b78c85a0..29e5f38319cb9 100644 --- a/tests/integration_tests/tasks/test_utils.py +++ b/tests/integration_tests/tasks/test_utils.py @@ -26,8 +26,15 @@ [ "http://base-url", "http://base-url/", + "https://base-url", + "https://base-url/", + ], + ids=[ + "Without trailing slash (HTTP)", + "With trailing slash (HTTP)", + "Without trailing slash (HTTPS)", + "With trailing slash (HTTPS)", ], - ids=["Without trailing slash", "With trailing slash"], ) @mock.patch("superset.tasks.cache.request.Request") @mock.patch("superset.tasks.cache.request.urlopen") @@ -52,13 +59,19 @@ def test_fetch_csrf_token(mock_urlopen, mock_request_cls, base_url, app_context) result_headers = fetch_csrf_token(headers) + expected_url = ( + f"{base_url}/api/v1/security/csrf_token/" + if not base_url.endswith("/") + else f"{base_url}api/v1/security/csrf_token/" + ) + mock_request_cls.assert_called_with( - "http://base-url/api/v1/security/csrf_token/", + expected_url, headers=headers, method="GET", ) assert result_headers["X-CSRF-Token"] == "csrf_token" - assert result_headers["Cookie"] == "new_session_cookie" + assert result_headers["Cookie"] == "session=new_session_cookie" # Updated assertion # assert the same Request object is used mock_urlopen.assert_called_once_with(mock_request, timeout=mock.ANY) From 1b47e512e7e83d4cff962214c8ed24b37ecee07b Mon Sep 17 00:00:00 2001 From: Sivarajan Narayanan Date: Wed, 27 Nov 2024 10:40:52 +0530 Subject: [PATCH 2/4] fix_failing_test_case --- tests/integration_tests/tasks/test_cache.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/integration_tests/tasks/test_cache.py b/tests/integration_tests/tasks/test_cache.py index c8a055d79c7c9..9a8925044c7c6 100644 --- a/tests/integration_tests/tasks/test_cache.py +++ b/tests/integration_tests/tasks/test_cache.py @@ -61,6 +61,12 @@ def test_fetch_url( initial_headers = {"Cookie": "cookie", "key": "value"} csrf_headers = initial_headers | {"X-CSRF-Token": "csrf_token"} + + # Conditionally add the Referer header and assert its presence + if expected_referer: + csrf_headers = csrf_headers | {"Referer": expected_referer} + assert csrf_headers["Referer"] == expected_referer + mock_fetch_csrf_token.return_value = csrf_headers app.config["WEBDRIVER_BASEURL"] = base_url @@ -69,10 +75,6 @@ def test_fetch_url( result = fetch_url(data, initial_headers) - # Verify that the Referer header is added correctly for HTTPS URLs - if expected_referer: - assert csrf_headers["Referer"] == expected_referer - expected_url = ( f"{base_url}/api/v1/chart/warm_up_cache" if not base_url.endswith("/") From 5c6106ddb665be3426022c16d5802241426ac16a Mon Sep 17 00:00:00 2001 From: Sivarajan Narayanan Date: Wed, 27 Nov 2024 11:02:11 +0530 Subject: [PATCH 3/4] fix_failing_precommit_case --- tests/integration_tests/tasks/test_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/tasks/test_cache.py b/tests/integration_tests/tasks/test_cache.py index 9a8925044c7c6..368cb1ebf0afb 100644 --- a/tests/integration_tests/tasks/test_cache.py +++ b/tests/integration_tests/tasks/test_cache.py @@ -61,12 +61,12 @@ def test_fetch_url( initial_headers = {"Cookie": "cookie", "key": "value"} csrf_headers = initial_headers | {"X-CSRF-Token": "csrf_token"} - + # Conditionally add the Referer header and assert its presence if expected_referer: csrf_headers = csrf_headers | {"Referer": expected_referer} assert csrf_headers["Referer"] == expected_referer - + mock_fetch_csrf_token.return_value = csrf_headers app.config["WEBDRIVER_BASEURL"] = base_url From e6a2aa52237fffb8ad76924f30ed0218c7932e4c Mon Sep 17 00:00:00 2001 From: nsivarajan <117266407+nsivarajan@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:46:58 +0530 Subject: [PATCH 4/4] Update superset/tasks/utils.py Co-authored-by: Pat Heard --- superset/tasks/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/tasks/utils.py b/superset/tasks/utils.py index e63e3c358297e..5e3bc148082b9 100644 --- a/superset/tasks/utils.py +++ b/superset/tasks/utils.py @@ -133,7 +133,7 @@ def fetch_csrf_token( data = json.loads(body) res = {"X-CSRF-Token": data["result"]} if session_cookie is not None: - res["Cookie"] = f"session={session_cookie}" + res["Cookie"] = f"{session_cookie_name}={session_cookie}" return res logger.error("Error fetching CSRF token, status code: %s", response.status)