From 901a91c3433161094b1e83f5b261773d22eb6e7b Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 11 Dec 2024 17:28:41 +0530 Subject: [PATCH 01/10] gh-127794: Validate header name according rfc-5322 and allow only printable ascii characters --- Lib/email/message.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Lib/email/message.py b/Lib/email/message.py index a58afc5fe5f68e..81750d248df281 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -564,6 +564,14 @@ def add_header(self, _name, _value, **_params): msg.add_header('content-disposition', 'attachment', filename='Fußballer.ppt')) """ + # Validate header name according to RFC 5322 + if not _name or ' ' in _name or '\t' in _name or ':' in _name: + raise ValueError(f"Invalid header field name {_name!r}") + + # Only allow printable ASCII characters + if any(ord(c) < 33 or ord(c) > 126 for c in _name): + raise ValueError(f"Header field name {_name!r} contains invalid characters") + parts = [] for k, v in _params.items(): if v is None: From 5be0eaa21d96358a1e9c5ec078b0c2b74868300e Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 11 Dec 2024 17:33:35 +0530 Subject: [PATCH 02/10] Add test case --- Lib/test/test_email/test_email.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index abe9ef2e94409f..f9e0cbf19de87e 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -728,6 +728,23 @@ def test_nonascii_add_header_with_tspecial(self): "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", msg['Content-Disposition']) + def test_invalid_headers(self): + invalid_headers = [ + ('Invalid Header', 'Contains space'), + ('Tab\tHeader', 'Contains tab'), + ('Colon:Header', 'Contains colon'), + ('', 'Empty name'), + ('Header\x7F', 'Non-ASCII character'), + ('Header\x1F', 'Control character'), + (' LeadingSpace', 'Starts with space'), + ('TrailingSpace ', 'Ends with space'), + ] + for name, value in invalid_headers: + with self.subTest(header=name): + with self.assertRaises(ValueError) as cm: + self.message.add_header(name, value) + self.assertIn("Invalid header", str(cm.exception)) + def test_binary_quopri_payload(self): for charset in ('latin-1', 'ascii'): msg = Message() From 6ae62731f881876b125e537cfe91833cdaad5106 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 11 Dec 2024 17:38:48 +0530 Subject: [PATCH 03/10] Add some more tests --- Lib/test/test_email/test_email.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index f9e0cbf19de87e..e0859809cf6568 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -730,20 +730,26 @@ def test_nonascii_add_header_with_tspecial(self): def test_invalid_headers(self): invalid_headers = [ - ('Invalid Header', 'Contains space'), - ('Tab\tHeader', 'Contains tab'), - ('Colon:Header', 'Contains colon'), + ('Invalid Header', 'contains space'), + ('Tab\tHeader', 'contains tab'), + ('Colon:Header', 'contains colon'), ('', 'Empty name'), - ('Header\x7F', 'Non-ASCII character'), - ('Header\x1F', 'Control character'), - (' LeadingSpace', 'Starts with space'), - ('TrailingSpace ', 'Ends with space'), + (' LeadingSpace', 'starts with space'), + ('TrailingSpace ', 'ends with space'), ] for name, value in invalid_headers: - with self.subTest(header=name): - with self.assertRaises(ValueError) as cm: - self.message.add_header(name, value) - self.assertIn("Invalid header", str(cm.exception)) + with self.assertRaises(ValueError) as cm: + Message().add_header(name, value) + self.assertIn("Invalid header field name", str(cm.exception)) + + invalid_headers = [ + ('Header\x7F', 'Non-ASCII character'), + ('Header\x1F', 'control character'), + ] + for name, value in invalid_headers: + with self.assertRaises(ValueError) as cm: + Message().add_header(name, value) + self.assertIn(f"Header field name {name!r} contains invalid characters", str(cm.exception)) def test_binary_quopri_payload(self): for charset in ('latin-1', 'ascii'): From a7d1c0cdb8496fc31851e01056827be5c0dbd88c Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 11 Dec 2024 17:42:02 +0530 Subject: [PATCH 04/10] Fix indentation --- Lib/test/test_email/test_email.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index e0859809cf6568..827b36c36bd3a6 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -743,7 +743,7 @@ def test_invalid_headers(self): self.assertIn("Invalid header field name", str(cm.exception)) invalid_headers = [ - ('Header\x7F', 'Non-ASCII character'), + ('Header\x7F', 'Non-ASCII character'), ('Header\x1F', 'control character'), ] for name, value in invalid_headers: From d04c9a2d3806d3c44d8e4c40d05c9a0295f53619 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Wed, 11 Dec 2024 17:44:54 +0530 Subject: [PATCH 05/10] Add news entry --- .../next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst b/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst new file mode 100644 index 00000000000000..c5ed88bd2bf61b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-11-17-44-36.gh-issue-127794.VwmRsp.rst @@ -0,0 +1,2 @@ +The :meth:`email.message.Message.add_header` method now validates header +field names according to :rfc:`5322`. From 53bdb4f135e8214c8a1c3586c273059e2102dfe6 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Fri, 13 Dec 2024 17:46:58 +0530 Subject: [PATCH 06/10] gh-127794: Create a separate function for validating header --- Lib/email/_policybase.py | 8 ++++++++ Lib/email/policy.py | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 4b63b97217a835..1af013a5f37e3e 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -90,6 +90,13 @@ def __add__(self, other): """ return self.clone(**other.__dict__) +def validate_header(name, value): + # Validate header name according to RFC 5322 + if not name or ' ' in name or '\t' in name or ':' in name: + raise ValueError(f"Invalid header field name {name!r}") + # Only allow printable ASCII characters + if any(ord(c) < 33 or ord(c) > 126 for c in name): + raise ValueError(f"Header field name {name!r} contains invalid characters") def _append_doc(doc, added_doc): doc = doc.rsplit('\n', 1)[0] @@ -314,6 +321,7 @@ def header_store_parse(self, name, value): """+ The name and value are returned unmodified. """ + validate_header(name, value) return (name, value) def header_fetch_parse(self, name, value): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 6e109b65011a44..88fe13e56cdde3 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -4,7 +4,7 @@ import re import sys -from email._policybase import Policy, Compat32, compat32, _extend_docstrings +from email._policybase import Policy, Compat32, compat32, _extend_docstrings, validate_header from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry from email.contentmanager import raw_data_manager @@ -140,6 +140,7 @@ def header_store_parse(self, name, value): """ if hasattr(value, 'name') and value.name.lower() == name.lower(): return (name, value) + validate_header(name, value) if isinstance(value, str) and len(value.splitlines())>1: # XXX this error message isn't quite right when we use splitlines # (see issue 22233), but I'm not sure what should happen here. From 31e4f3ecd0f6e9954d823c705c192c2f18d39860 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Fri, 13 Dec 2024 17:54:30 +0530 Subject: [PATCH 07/10] gh-127794: Coverge all the validations into a single regex --- Lib/email/_policybase.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 1af013a5f37e3e..f4140fd67e4332 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -92,7 +92,8 @@ def __add__(self, other): def validate_header(name, value): # Validate header name according to RFC 5322 - if not name or ' ' in name or '\t' in name or ':' in name: + import re + if not re.match(r'^[^\s:]+$', name): raise ValueError(f"Invalid header field name {name!r}") # Only allow printable ASCII characters if any(ord(c) < 33 or ord(c) > 126 for c in name): From fae3664674380bf0a0a936903d6411a793a405aa Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Fri, 13 Dec 2024 17:55:05 +0530 Subject: [PATCH 08/10] gh-127794: Revert the changes --- Lib/email/message.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Lib/email/message.py b/Lib/email/message.py index 81750d248df281..a58afc5fe5f68e 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -564,14 +564,6 @@ def add_header(self, _name, _value, **_params): msg.add_header('content-disposition', 'attachment', filename='Fußballer.ppt')) """ - # Validate header name according to RFC 5322 - if not _name or ' ' in _name or '\t' in _name or ':' in _name: - raise ValueError(f"Invalid header field name {_name!r}") - - # Only allow printable ASCII characters - if any(ord(c) < 33 or ord(c) > 126 for c in _name): - raise ValueError(f"Header field name {_name!r} contains invalid characters") - parts = [] for k, v in _params.items(): if v is None: From 026f35bca220410f7f514f1e6cb3cda2e142981c Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Fri, 13 Dec 2024 17:58:46 +0530 Subject: [PATCH 09/10] gh-127794: Remove one variable --- Lib/email/_policybase.py | 4 ++-- Lib/email/policy.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index f4140fd67e4332..12211158f4699b 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -90,7 +90,7 @@ def __add__(self, other): """ return self.clone(**other.__dict__) -def validate_header(name, value): +def validate_header(name): # Validate header name according to RFC 5322 import re if not re.match(r'^[^\s:]+$', name): @@ -322,7 +322,7 @@ def header_store_parse(self, name, value): """+ The name and value are returned unmodified. """ - validate_header(name, value) + validate_header(name) return (name, value) def header_fetch_parse(self, name, value): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 88fe13e56cdde3..48e6b2340e2c61 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -140,7 +140,7 @@ def header_store_parse(self, name, value): """ if hasattr(value, 'name') and value.name.lower() == name.lower(): return (name, value) - validate_header(name, value) + validate_header(name) if isinstance(value, str) and len(value.splitlines())>1: # XXX this error message isn't quite right when we use splitlines # (see issue 22233), but I'm not sure what should happen here. From 8f6f6c34124f06f6554371050de3f2269c377354 Mon Sep 17 00:00:00 2001 From: Srinivas Reddy Thatiparthy Date: Fri, 13 Dec 2024 18:42:46 +0530 Subject: [PATCH 10/10] gh-127794: Update tests --- Lib/email/_policybase.py | 2 +- Lib/test/test_email/test_email.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 12211158f4699b..9558691402d458 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -97,7 +97,7 @@ def validate_header(name): raise ValueError(f"Invalid header field name {name!r}") # Only allow printable ASCII characters if any(ord(c) < 33 or ord(c) > 126 for c in name): - raise ValueError(f"Header field name {name!r} contains invalid characters") + raise ValueError(f"Invalid header field name {name!r}") def _append_doc(doc, added_doc): doc = doc.rsplit('\n', 1)[0] diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 827b36c36bd3a6..ade9d3a2ac28f5 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -740,7 +740,7 @@ def test_invalid_headers(self): for name, value in invalid_headers: with self.assertRaises(ValueError) as cm: Message().add_header(name, value) - self.assertIn("Invalid header field name", str(cm.exception)) + self.assertIn(f"Invalid header field name {name!r}", str(cm.exception)) invalid_headers = [ ('Header\x7F', 'Non-ASCII character'), @@ -749,7 +749,7 @@ def test_invalid_headers(self): for name, value in invalid_headers: with self.assertRaises(ValueError) as cm: Message().add_header(name, value) - self.assertIn(f"Header field name {name!r} contains invalid characters", str(cm.exception)) + self.assertIn(f"Invalid header field name {name!r}", str(cm.exception)) def test_binary_quopri_payload(self): for charset in ('latin-1', 'ascii'):