From 1b8818f66a642f4853a7fb04302eb23565d1926d Mon Sep 17 00:00:00 2001 From: nannan00 <17491932+nannan00@users.noreply.github.com> Date: Thu, 29 Feb 2024 10:45:26 +0800 Subject: [PATCH] test: test url_has_allowed_host_and_scheme --- .pre-commit-config.yaml | 2 +- src/bk-login/bklogin/authentication/utils.py | 13 +- src/bk-login/tests/authentication/__init__.py | 10 ++ .../tests/authentication/test_utils.py | 121 ++++++++++++++++++ 4 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 src/bk-login/tests/authentication/__init__.py create mode 100644 src/bk-login/tests/authentication/test_utils.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f140e2306..34659f1a3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -86,7 +86,7 @@ repos: name: ruff language: python types: [python] - entry: ruff --config=src/bk-login/pyproject.toml --fix --no-cache + entry: ruff --config=src/bk-login/pyproject.toml --unsafe-fixes --fix --no-cache files: src/bk-login/ - id: mypy name: mypy diff --git a/src/bk-login/bklogin/authentication/utils.py b/src/bk-login/bklogin/authentication/utils.py index c5550bba9..906160f1a 100644 --- a/src/bk-login/bklogin/authentication/utils.py +++ b/src/bk-login/bklogin/authentication/utils.py @@ -9,9 +9,9 @@ specific language governing permissions and limitations under the License. """ import unicodedata +from urllib.parse import urlparse from django.http.request import split_domain_port, validate_host -from django.utils.http import _urlparse # Copied from django.utils.http.url_has_allowed_host_and_scheme() @@ -45,13 +45,18 @@ def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): # Copied from django.utils.http._url_has_allowed_host_and_scheme() # but additional support for wildcard domain matching. +# 支持匹配: +# (1) * 匹配任意域名 +# (2) 泛域名匹配,比如 .example.com 可匹配 foo.example.com、example.com、foo.example.com:8000、example.com:8080 +# (3) 精确域名匹配,比如 example.com 可匹配 example.com、example.com:8000 +# (4) 精确域名&端口匹配,比如 example.com:9000 只可匹配 example.com:9000 def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): # Chrome considers any URL with more than two slashes to be absolute, but # urlparse is not so flexible. Treat any url with three slashes as unsafe. if url.startswith("///"): return False try: - url_info = _urlparse(url) + url_info = urlparse(url) except ValueError: # e.g. invalid IPv6 addresses return False # Forbid URLs like http:///example.com - with a scheme, but without a hostname. @@ -70,12 +75,16 @@ def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): if not url_info.scheme and url_info.netloc: scheme = "http" valid_schemes = ["https"] if require_https else ["http", "https"] + + # Check if the scheme is valid. if scheme and scheme not in valid_schemes: return False + # Check if netloc is in allowed_hosts if not url_info.netloc or url_info.netloc in allowed_hosts: return True + # Check wildcard domain matching # Copied from django.http.request.HttpRequest.get_host() domain, port = split_domain_port(url_info.netloc) return domain and validate_host(domain, allowed_hosts) diff --git a/src/bk-login/tests/authentication/__init__.py b/src/bk-login/tests/authentication/__init__.py new file mode 100644 index 000000000..1060b7bf4 --- /dev/null +++ b/src/bk-login/tests/authentication/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" diff --git a/src/bk-login/tests/authentication/test_utils.py b/src/bk-login/tests/authentication/test_utils.py new file mode 100644 index 000000000..b59081dbe --- /dev/null +++ b/src/bk-login/tests/authentication/test_utils.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +TencentBlueKing is pleased to support the open source community by making 蓝鲸智云-用户管理(Bk-User) available. +Copyright (C) 2017-2021 THL A29 Limited, a Tencent company. All rights reserved. +Licensed under the MIT License (the "License"); you may not use this file except in compliance with the License. +You may obtain a copy of the License at http://opensource.org/licenses/MIT +Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" +import pytest +from bklogin.authentication.utils import url_has_allowed_host_and_scheme + + +class TestURLHasAllowedHostAndScheme: + @pytest.mark.parametrize( + "bad_url", + [ + "http://example.com", + "http:///example.com", + "https://example.com", + "ftp://example.com", + r"\\example.com", + r"\\\example.com", + r"/\\/example.com", + r"\\//example.com", + r"/\/example.com", + r"\/example.com", + r"/\example.com", + r"http:/\//example.com", + r"http:\/example.com", + r"http:/\example.com", + 'javascript:alert("XSS")', + "\njavascript:alert(x)", + "java\nscript:alert(x)", + "\x08//example.com", + r"http://otherserver\@example.com", + r"http:\\testserver\@example.com", + r"http://testserver\me:pass@example.com", + r"http://testserver\@example.com", + r"http:\\testserver\confirm\me@example.com", + "http:999999999", + "ftp:9999999999", + "\n", + "http://[2001:cdba:0000:0000:0000:0000:3257:9652/", + "http://2001:cdba:0000:0000:0000:0000:3257:9652]/", + ], + ) + def test_bad_urls(self, bad_url): + assert not url_has_allowed_host_and_scheme(bad_url, allowed_hosts={"testserver", "testserver2"}) + + @pytest.mark.parametrize( + "good_url", + [ + "/view/?param=http://example.com", + "/view/?param=https://example.com", + "/view?param=ftp://example.com", + "view/?param=//example.com", + "https://testserver/", + "HTTPS://testserver/", + "//testserver/", + "http://testserver/confirm?email=me@example.com", + "/url%20with%20spaces/", + "path/http:2222222222", + ], + ) + def test_good_urls(self, good_url): + assert url_has_allowed_host_and_scheme(good_url, allowed_hosts={"testserver", "otherserver"}) + + def test_basic_auth(self): + assert url_has_allowed_host_and_scheme(r"http://user:pass@testserver/", allowed_hosts={"user:pass@testserver"}) + + def test_no_allowed_hosts(self): + assert url_has_allowed_host_and_scheme("/confirm/me@example.com", allowed_hosts=None) + assert not url_has_allowed_host_and_scheme(r"http://testserver\@example.com", allowed_hosts=None) + + def test_allowed_hosts_str(self): + assert url_has_allowed_host_and_scheme("http://good.com/good", allowed_hosts="good.com") + assert not url_has_allowed_host_and_scheme("http://good.co/evil", allowed_hosts="good.com") + + @pytest.mark.parametrize( + ("url", "excepted"), + [ + ("https://example.com/p", True), + ("HTTPS://example.com/p", True), + ("/view/?param=http://example.com", True), + ("http://example.com/p", False), + ("ftp://example.com/p", False), + ("//example.com/p", False), + ], + ) + def test_scheme_param_urls(self, url, excepted): + assert url_has_allowed_host_and_scheme(url, allowed_hosts={"example.com"}, require_https=True) == excepted + + @pytest.mark.parametrize( + ("url", "allowed_hosts", "excepted"), + [ + # * 匹配任意域名 + ("https://wwww.example.com", "*", True), + ("https://wwww.example1.com/p1/p2", "*", True), + ("https://[2001:cdba:0000:0000:0000:0000:3257:9652/", "*", False), + # 泛域名匹配 + ("https://foo.example.com", ".example.com", True), + ("https://example.com", ".example.com", True), + ("https://www.foo.example.com", ".example.com", True), + ("https://foo.example.com:1111", ".example.com", True), + ("https://foo.example.com:1111/p1/p2", ".example.com", True), + ("https://foo.example1.com", ".example.com", False), + # 精确域名匹配 + ("https://example.com", "example.com", True), + ("https://example.com:1111", "example.com", True), + ("https://foo.example.com", "example.com", False), + # 精确域名&端口匹配 + ("https://example.com:1111", "example.com:1111", True), + ("https://example.com", "example.com:1111", False), + ("https://example.com:2222", "example.com:1111", False), + ("https://foo.example.com", "example.com:1111", False), + ], + ) + def test_wildcard_domain(self, url, allowed_hosts, excepted): + assert url_has_allowed_host_and_scheme(url, allowed_hosts=allowed_hosts, require_https=True) == excepted