From 3e1366a43d0df5e1d4260f00b769816ed0250c56 Mon Sep 17 00:00:00 2001 From: Jared Deckard Date: Thu, 22 Feb 2024 08:56:33 -0600 Subject: [PATCH] Restrict URL userinfo to NWG RFC 3986 --- src/marshmallow/validate.py | 4 ++-- tests/test_validate.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/marshmallow/validate.py b/src/marshmallow/validate.py index 708fed85a..9fb16c86d 100644 --- a/src/marshmallow/validate.py +++ b/src/marshmallow/validate.py @@ -131,9 +131,9 @@ def _regex_generator( # this is validated separately against allowed schemes, so in the regex # we simply want to capture its existence r"(?:[a-z0-9\.\-\+]*)://", - # basic_auth, for URLs encoding a username:password + # userinfo, for URLs encoding authentication # e.g. 'ftp://foo:bar@ftp.example.org/' - r"(?:[^:@]+?(:[^:@]*?)?@|)", + r"(?:(?:[a-z0-9\-._~!$&'()*+,;=:]|%[0-9a-f]{2})*@)?", # netloc, the hostname/domain part of the URL plus the optional port r"(?:", "|".join(hostname_variants), diff --git a/tests/test_validate.py b/tests/test_validate.py index 0d7c56cfa..fcda4e816 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -29,6 +29,9 @@ "http://www.example.com:8000/foo", "http://user@example.com", "http://user:pass@example.com", + "http://:pass@example.com", + "http://@example.com", + "http://AZaz09-._~%2A!$&'()*+,;=:@example.com", ], ) def test_url_absolute_valid(valid_url): @@ -58,6 +61,12 @@ def test_url_absolute_valid(valid_url): " ", "", None, + "http://user@pass@example.com", + "http://@pass@example.com", + "http://@@example.com", + "http://^@example.com", + "http://%0G@example.com", + "http://%@example.com", ], ) def test_url_absolute_invalid(invalid_url):