From 8977ef2686177a4cd2d4e4de2c5d90713222230f Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Mon, 9 Sep 2024 11:29:02 +0800 Subject: [PATCH] Update patch to Python 3.9.20. --- Makefile | 2 +- patch/Python/Python.patch | 3044 +------------------------------------ 2 files changed, 27 insertions(+), 3019 deletions(-) diff --git a/Makefile b/Makefile index 814641b..ad0dd2f 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ BUILD_NUMBER=custom # of a release cycle, as official binaries won't be published. # PYTHON_MICRO_VERSION is the full version number, without any alpha/beta/rc suffix. (e.g., 3.10.0) # PYTHON_VER is the major/minor version (e.g., 3.10) -PYTHON_VERSION=3.9.19 +PYTHON_VERSION=3.9.20 PYTHON_PKG_VERSION=3.9.13 PYTHON_MICRO_VERSION=$(shell echo $(PYTHON_VERSION) | grep -Eo "\d+\.\d+\.\d+") PYTHON_PKG_MICRO_VERSION=$(shell echo $(PYTHON_PKG_VERSION) | grep -Eo "\d+\.\d+\.\d+") diff --git a/patch/Python/Python.patch b/patch/Python/Python.patch index 4d1250b..aa04a3e 100644 --- a/patch/Python/Python.patch +++ b/patch/Python/Python.patch @@ -1,133 +1,3 @@ -diff --git a/Doc/library/asyncio.rst b/Doc/library/asyncio.rst -index a6429394389..ec876f7ab53 100644 ---- a/Doc/library/asyncio.rst -+++ b/Doc/library/asyncio.rst -@@ -56,6 +56,26 @@ - * :ref:`bridge ` callback-based libraries and code - with async/await syntax. - -+.. _asyncio-cli: -+ -+.. rubric:: asyncio REPL -+ -+You can experiment with an ``asyncio`` concurrent context in the REPL: -+ -+.. code-block:: pycon -+ -+ $ python -m asyncio -+ asyncio REPL ... -+ Use "await" directly instead of "asyncio.run()". -+ Type "help", "copyright", "credits" or "license" for more information. -+ >>> import asyncio -+ >>> await asyncio.sleep(10, result='hello') -+ 'hello' -+ -+.. audit-event:: cpython.run_stdin "" "" -+ -+.. versionchanged:: 3.9.20 (also 3.8.20) -+ Emits audit events. - - .. We use the "rubric" directive here to avoid creating - the "Reference" subsection in the TOC. -diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst -index f4b9f525096..878c09bb040 100644 ---- a/Doc/library/email.errors.rst -+++ b/Doc/library/email.errors.rst -@@ -59,6 +59,12 @@ - :class:`~email.mime.image.MIMEImage`). - - -+.. exception:: HeaderWriteError() -+ -+ Raised when an error occurs when the :mod:`~email.generator` outputs -+ headers. -+ -+ - Here is the list of the defects that the :class:`~email.parser.FeedParser` - can find while parsing messages. Note that the defects are added to the message - where the problem was found, so for example, if a message nested inside a -diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst -index bf53b9520fc..57a75ce4529 100644 ---- a/Doc/library/email.policy.rst -+++ b/Doc/library/email.policy.rst -@@ -229,6 +229,24 @@ - - .. versionadded:: 3.6 - -+ -+ .. attribute:: verify_generated_headers -+ -+ If ``True`` (the default), the generator will raise -+ :exc:`~email.errors.HeaderWriteError` instead of writing a header -+ that is improperly folded or delimited, such that it would -+ be parsed as multiple headers or joined with adjacent data. -+ Such headers can be generated by custom header classes or bugs -+ in the ``email`` module. -+ -+ As it's a security feature, this defaults to ``True`` even in the -+ :class:`~email.policy.Compat32` policy. -+ For backwards compatible, but unsafe, behavior, it must be set to -+ ``False`` explicitly. -+ -+ .. versionadded:: 3.9.20 -+ -+ - The following :class:`Policy` method is intended to be called by code using - the email library to create policy instances with custom settings: - -diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst -index 4d0e920eb0a..89f6231ba6d 100644 ---- a/Doc/library/email.utils.rst -+++ b/Doc/library/email.utils.rst -@@ -60,13 +60,18 @@ - begins with angle brackets, they are stripped off. - - --.. function:: parseaddr(address) -+.. function:: parseaddr(address, *, strict=True) - - Parse address -- which should be the value of some address-containing field such - as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and - *email address* parts. Returns a tuple of that information, unless the parse - fails, in which case a 2-tuple of ``('', '')`` is returned. - -+ If *strict* is true, use a strict parser which rejects malformed inputs. -+ -+ .. versionchanged:: 3.9.20 -+ Add *strict* optional parameter and reject malformed inputs by default. -+ - - .. function:: formataddr(pair, charset='utf-8') - -@@ -84,12 +89,15 @@ - Added the *charset* option. - - --.. function:: getaddresses(fieldvalues) -+.. function:: getaddresses(fieldvalues, *, strict=True) - - This method returns a list of 2-tuples of the form returned by ``parseaddr()``. - *fieldvalues* is a sequence of header field values as might be returned by -- :meth:`Message.get_all `. Here's a simple -- example that gets all the recipients of a message:: -+ :meth:`Message.get_all `. -+ -+ If *strict* is true, use a strict parser which rejects malformed inputs. -+ -+ Here's a simple example that gets all the recipients of a message:: - - from email.utils import getaddresses - -@@ -99,6 +107,9 @@ - resent_ccs = msg.get_all('resent-cc', []) - all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) - -+ .. versionchanged:: 3.9.20 -+ Add *strict* optional parameter and reject malformed inputs by default. -+ - - .. function:: parsedate(date) - diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index 97564815310..c4a15bce0e6 100644 --- a/Doc/library/importlib.rst @@ -202,70 +72,8 @@ index 97564815310..c4a15bce0e6 100644 :mod:`importlib.util` -- Utility code for importers --------------------------------------------------- -diff --git a/Doc/library/ipaddress.rst b/Doc/library/ipaddress.rst -index 9c2dff55703..f9c1ebf3f3d 100644 ---- a/Doc/library/ipaddress.rst -+++ b/Doc/library/ipaddress.rst -@@ -188,18 +188,53 @@ - - .. attribute:: is_private - -- ``True`` if the address is allocated for private networks. See -+ ``True`` if the address is defined as not globally reachable by - iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ -- (for IPv6). -+ (for IPv6) with the following exceptions: -+ -+ * ``is_private`` is ``False`` for the shared address space (``100.64.0.0/10``) -+ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the -+ semantics of the underlying IPv4 addresses and the following condition holds -+ (see :attr:`IPv6Address.ipv4_mapped`):: -+ -+ address.is_private == address.ipv4_mapped.is_private -+ -+ ``is_private`` has value opposite to :attr:`is_global`, except for the shared address space -+ (``100.64.0.0/10`` range) where they are both ``False``. -+ -+ .. versionchanged:: 3.9.20 -+ -+ Fixed some false positives and false negatives. -+ -+ * ``192.0.0.0/24`` is considered private with the exception of ``192.0.0.9/32`` and -+ ``192.0.0.10/32`` (previously: only the ``192.0.0.0/29`` sub-range was considered private). -+ * ``64:ff9b:1::/48`` is considered private. -+ * ``2002::/16`` is considered private. -+ * There are exceptions within ``2001::/23`` (otherwise considered private): ``2001:1::1/128``, -+ ``2001:1::2/128``, ``2001:3::/32``, ``2001:4:112::/48``, ``2001:20::/28``, ``2001:30::/28``. -+ The exceptions are not considered private. - - .. attribute:: is_global - -- ``True`` if the address is allocated for public networks. See -+ ``True`` if the address is defined as globally reachable by - iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ -- (for IPv6). -+ (for IPv6) with the following exception: -+ -+ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the -+ semantics of the underlying IPv4 addresses and the following condition holds -+ (see :attr:`IPv6Address.ipv4_mapped`):: -+ -+ address.is_global == address.ipv4_mapped.is_global -+ -+ ``is_global`` has value opposite to :attr:`is_private`, except for the shared address space -+ (``100.64.0.0/10`` range) where they are both ``False``. - - .. versionadded:: 3.4 - -+ .. versionchanged:: 3.9.20 -+ -+ Fixed some false positives and false negatives, see :attr:`is_private` for details. -+ - .. attribute:: is_unspecified - - ``True`` if the address is unspecified. See :RFC:`5735` (for IPv4) diff --git a/Doc/library/os.rst b/Doc/library/os.rst -index 35a7e1e96d3..a3bb0a48e98 100644 +index 1d10ae7bbce..a3bb0a48e98 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -638,6 +638,11 @@ @@ -280,27 +88,6 @@ index 35a7e1e96d3..a3bb0a48e98 100644 .. availability:: recent flavors of Unix. .. versionchanged:: 3.3 -@@ -1929,6 +1934,10 @@ - platform-dependent. On some platforms, they are ignored and you should call - :func:`chmod` explicitly to set them. - -+ On Windows, a *mode* of ``0o700`` is specifically handled to apply access -+ control to the new directory such that only the current user and -+ administrators have access. Other values of *mode* are ignored. -+ - This function can also support :ref:`paths relative to directory descriptors - `. - -@@ -1943,6 +1952,9 @@ - .. versionchanged:: 3.6 - Accepts a :term:`path-like object`. - -+ .. versionchanged:: 3.9.20 -+ Windows now handles a *mode* of ``0o700``. -+ - - .. function:: makedirs(name, mode=0o777, exist_ok=False) - diff --git a/Doc/library/platform.rst b/Doc/library/platform.rst index 722a5b59e3e..4adccec2371 100644 --- a/Doc/library/platform.rst @@ -357,36 +144,6 @@ index 722a5b59e3e..4adccec2371 100644 Unix Platforms -------------- -diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst -index 370ea5839b6..70cf95edcc3 100644 ---- a/Doc/library/subprocess.rst -+++ b/Doc/library/subprocess.rst -@@ -713,8 +713,8 @@ - Security Considerations - ----------------------- - --Unlike some other popen functions, this implementation will never --implicitly call a system shell. This means that all characters, -+Unlike some other popen functions, this library will not -+implicitly choose to call a system shell. This means that all characters, - including shell metacharacters, can safely be passed to child processes. - If the shell is invoked explicitly, via ``shell=True``, it is the application's - responsibility to ensure that all whitespace and metacharacters are -@@ -726,6 +726,14 @@ - used to properly escape whitespace and shell metacharacters in strings - that are going to be used to construct shell commands. - -+On Windows, batch files (:file:`*.bat` or :file:`*.cmd`) may be launched by the -+operating system in a system shell regardless of the arguments passed to this -+library. This could result in arguments being parsed according to shell rules, -+but without any escaping added by Python. If you are intentionally launching a -+batch file with arguments from untrusted sources, consider passing -+``shell=True`` to allow Python to escape special characters. See :gh:`114539` -+for additional discussion. -+ - - Popen Objects - ------------- diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 9de30a182f4..8c5400e1143 100644 --- a/Doc/library/urllib.parse.rst @@ -453,41 +210,6 @@ index 0ddba0dec84..210690fba2d 100644 Here are some simple examples:: url = 'https://docs.python.org/' -diff --git a/Doc/tools/susp-ignored.csv b/Doc/tools/susp-ignored.csv -index 3eb3d7954f8..de91a50bad0 100644 ---- a/Doc/tools/susp-ignored.csv -+++ b/Doc/tools/susp-ignored.csv -@@ -169,6 +169,14 @@ - library/ipaddress,,::,2001:db00::0/24 - library/ipaddress,,:db00,2001:db00::0/ffff:ff00:: - library/ipaddress,,::,2001:db00::0/ffff:ff00:: -+library/ipaddress,,:ff9b,64:ff9b:1::/48 -+library/ipaddress,,::,64:ff9b:1::/48 -+library/ipaddress,,::,2001:: -+library/ipaddress,,::,2001:1:: -+library/ipaddress,,::,2001:3:: -+library/ipaddress,,::,2001:4:112:: -+library/ipaddress,,::,2001:20:: -+library/ipaddress,,::,2001:30:: - library/itertools,,:step,elements from seq[start:stop:step] - library/itertools,,:stop,elements from seq[start:stop:step] - library/itertools,,::,kernel = tuple(kernel)[::-1] -diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst -index 66d8d57aadf..5a62bdd950e 100644 ---- a/Doc/using/cmdline.rst -+++ b/Doc/using/cmdline.rst -@@ -610,6 +610,11 @@ - This variable can also be modified by Python code using :data:`os.environ` - to force inspect mode on program termination. - -+ .. audit-event:: cpython.run_stdin "" "" -+ -+ .. versionchanged:: 3.9.20 (also 3.8.20) -+ Emits audit events. -+ - - .. envvar:: PYTHONUNBUFFERED - diff --git a/Doc/using/mac.rst b/Doc/using/mac.rst index f7db038430b..e218fe31160 100644 --- a/Doc/using/mac.rst @@ -521,93 +243,6 @@ index f7db038430b..e218fe31160 100644 Other Resources =============== -diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst -index 0064e074a3a..f4c3876c8ff 100644 ---- a/Doc/whatsnew/3.9.rst -+++ b/Doc/whatsnew/3.9.rst -@@ -613,6 +613,13 @@ - convert a wait status to an exit code. - (Contributed by Victor Stinner in :issue:`40094`.) - -+As of 3.9.20, :func:`os.mkdir` and :func:`os.makedirs` on Windows now support -+passing a *mode* value of ``0o700`` to apply access control to the new -+directory. This implicitly affects :func:`tempfile.mkdtemp` and is a -+mitigation for CVE-2024-4030. Other values for *mode* continue to be -+ignored. -+(Contributed by Steve Dower in :gh:`118486`.) -+ - pathlib - ------- - -@@ -704,6 +711,14 @@ - ``stderr`` defaults to always being line-buffered. - (Contributed by Jendrik Seipp in :issue:`13601`.) - -+tempfile -+-------- -+ -+As of 3.9.20 on Windows, the default mode ``0o700`` used by -+:func:`tempfile.mkdtemp` now limits access to the new directory due to -+changes to :func:`os.mkdir`. This is a mitigation for CVE-2024-4030. -+(Contributed by Steve Dower in :gh:`118486`.) -+ - tracemalloc - ----------- - -@@ -1616,3 +1631,34 @@ - :exc:`DeprecationWarning`. - In Python 3.14, the default will switch to ``'data'``. - (Contributed by Petr Viktorin in :pep:`706`.) -+ -+Notable changes in 3.9.20 -+========================= -+ -+ipaddress -+--------- -+ -+* Fixed ``is_global`` and ``is_private`` behavior in ``IPv4Address``, -+ ``IPv6Address``, ``IPv4Network`` and ``IPv6Network``. -+ -+email -+----- -+ -+* Headers with embedded newlines are now quoted on output. -+ -+ The :mod:`~email.generator` will now refuse to serialize (write) headers -+ that are improperly folded or delimited, such that they would be parsed as -+ multiple headers or joined with adjacent data. -+ If you need to turn this safety feature off, -+ set :attr:`~email.policy.Policy.verify_generated_headers`. -+ (Contributed by Bas Bloemsaat and Petr Viktorin in :gh:`121650`.) -+ -+* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return -+ ``('', '')`` 2-tuples in more situations where invalid email addresses are -+ encountered, instead of potentially inaccurate values. -+ An optional *strict* parameter was added to these two functions: -+ use ``strict=False`` to get the old behavior, accepting malformed inputs. -+ ``getattr(email.utils, 'supports_strict_parsing', False)`` can be used to -+ check if the *strict* paramater is available. -+ (Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve -+ the CVE-2023-27043 fix.) -diff --git a/Include/patchlevel.h b/Include/patchlevel.h -index 5b5ef557639..7ed78c7b132 100644 ---- a/Include/patchlevel.h -+++ b/Include/patchlevel.h -@@ -18,12 +18,12 @@ - /*--start constants--*/ - #define PY_MAJOR_VERSION 3 - #define PY_MINOR_VERSION 9 --#define PY_MICRO_VERSION 19 -+#define PY_MICRO_VERSION 20 - #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL - #define PY_RELEASE_SERIAL 0 - - /* Version as a string */ --#define PY_VERSION "3.9.19" -+#define PY_VERSION "3.9.20" - /*--end constants--*/ - - /* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. --- /dev/null +++ b/Lib/_ios_support.py @@ -0,0 +1,71 @@ @@ -682,44 +317,6 @@ index 5b5ef557639..7ed78c7b132 100644 + model = objc.objc_msgSend(device_model, SEL_UTF8String).decode() + + return system, release, model, is_simulator -diff --git a/Lib/asyncio/__main__.py b/Lib/asyncio/__main__.py -index 18bb87a5bc4..73330f4ac3f 100644 ---- a/Lib/asyncio/__main__.py -+++ b/Lib/asyncio/__main__.py -@@ -90,6 +90,8 @@ - - - if __name__ == '__main__': -+ sys.audit("cpython.run_stdin") -+ - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - -diff --git a/Lib/asyncio/windows_events.py b/Lib/asyncio/windows_events.py -index da81ab435b9..b31270e0e62 100644 ---- a/Lib/asyncio/windows_events.py -+++ b/Lib/asyncio/windows_events.py -@@ -323,13 +323,13 @@ - if self._self_reading_future is not None: - ov = self._self_reading_future._ov - self._self_reading_future.cancel() -- # self_reading_future was just cancelled so if it hasn't been -- # finished yet, it never will be (it's possible that it has -- # already finished and its callback is waiting in the queue, -- # where it could still happen if the event loop is restarted). -- # Unregister it otherwise IocpProactor.close will wait for it -- # forever -- if ov is not None: -+ # self_reading_future always uses IOCP, so even though it's -+ # been cancelled, we need to make sure that the IOCP message -+ # is received so that the kernel is not holding on to the -+ # memory, possibly causing memory corruption later. Only -+ # unregister it if IO is complete in all respects. Otherwise -+ # we need another _poll() later to complete the IO. -+ if ov is not None and not ov.pending: - self._proactor._unregister(ov) - self._self_reading_future = None - diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index 4afa4ebd422..dca2081bea4 100644 --- a/Lib/ctypes/__init__.py @@ -886,349 +483,6 @@ index 4b002ecef1d..75465324735 100644 try: import pwd os.environ['HOME'] = pwd.getpwuid(os.getuid())[5] -diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py -index 8a8fb8bc42a..e394cfd2e19 100644 ---- a/Lib/email/_header_value_parser.py -+++ b/Lib/email/_header_value_parser.py -@@ -92,6 +92,8 @@ - ASPECIALS = TSPECIALS | set("*'%") - ATTRIBUTE_ENDS = ASPECIALS | WSP - EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') -+NLSET = {'\n', '\r'} -+SPECIALSNL = SPECIALS | NLSET - - def quote_string(value): - return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' -@@ -2778,9 +2780,13 @@ - wrap_as_ew_blocked -= 1 - continue - tstr = str(part) -- if part.token_type == 'ptext' and set(tstr) & SPECIALS: -- # Encode if tstr contains special characters. -- want_encoding = True -+ if not want_encoding: -+ if part.token_type == 'ptext': -+ # Encode if tstr contains special characters. -+ want_encoding = not SPECIALSNL.isdisjoint(tstr) -+ else: -+ # Encode if tstr contains newlines. -+ want_encoding = not NLSET.isdisjoint(tstr) - try: - tstr.encode(encoding) - charset = encoding -diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py -index c9cbadd2a80..d1f48211f90 100644 ---- a/Lib/email/_policybase.py -+++ b/Lib/email/_policybase.py -@@ -157,6 +157,13 @@ - message_factory -- the class to use to create new message objects. - If the value is None, the default is Message. - -+ verify_generated_headers -+ -- if true, the generator verifies that each header -+ they are properly folded, so that a parser won't -+ treat it as multiple headers, start-of-body, or -+ part of another header. -+ This is a check against custom Header & fold() -+ implementations. - """ - - raise_on_defect = False -@@ -165,6 +172,7 @@ - max_line_length = 78 - mangle_from_ = False - message_factory = None -+ verify_generated_headers = True - - def handle_defect(self, obj, defect): - """Based on policy, either raise defect or call register_defect. -diff --git a/Lib/email/errors.py b/Lib/email/errors.py -index d28a6800104..1a0d5c63e60 100644 ---- a/Lib/email/errors.py -+++ b/Lib/email/errors.py -@@ -29,6 +29,10 @@ - """An illegal charset was given.""" - - -+class HeaderWriteError(MessageError): -+ """Error while writing headers.""" -+ -+ - # These are parsing defects which the parser was able to work around. - class MessageDefect(ValueError): - """Base class for a message defect.""" -diff --git a/Lib/email/generator.py b/Lib/email/generator.py -index c9b121624e0..89224ae41cb 100644 ---- a/Lib/email/generator.py -+++ b/Lib/email/generator.py -@@ -14,12 +14,14 @@ - from copy import deepcopy - from io import StringIO, BytesIO - from email.utils import _has_surrogates -+from email.errors import HeaderWriteError - - UNDERSCORE = '_' - NL = '\n' # XXX: no longer used by the code below. - - NLCRE = re.compile(r'\r\n|\r|\n') - fcre = re.compile(r'^From ', re.MULTILINE) -+NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') - - - -@@ -223,7 +225,16 @@ - - def _write_headers(self, msg): - for h, v in msg.raw_items(): -- self.write(self.policy.fold(h, v)) -+ folded = self.policy.fold(h, v) -+ if self.policy.verify_generated_headers: -+ linesep = self.policy.linesep -+ if not folded.endswith(self.policy.linesep): -+ raise HeaderWriteError( -+ f'folded header does not end with {linesep!r}: {folded!r}') -+ if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)): -+ raise HeaderWriteError( -+ f'folded header contains newline: {folded!r}') -+ self.write(folded) - # A blank line always separates headers from body - self.write(self._NL) - -diff --git a/Lib/email/utils.py b/Lib/email/utils.py -index 48d30160aa6..7ca7a7c8867 100644 ---- a/Lib/email/utils.py -+++ b/Lib/email/utils.py -@@ -48,6 +48,7 @@ - specialsre = re.compile(r'[][\\()<>@,:;".]') - escapesre = re.compile(r'[\\"]') - -+ - def _has_surrogates(s): - """Return True if s contains surrogate-escaped binary data.""" - # This check is based on the fact that unless there are surrogates, utf8 -@@ -106,12 +107,127 @@ - return address - - -+def _iter_escaped_chars(addr): -+ pos = 0 -+ escape = False -+ for pos, ch in enumerate(addr): -+ if escape: -+ yield (pos, '\\' + ch) -+ escape = False -+ elif ch == '\\': -+ escape = True -+ else: -+ yield (pos, ch) -+ if escape: -+ yield (pos, '\\') -+ -+ -+def _strip_quoted_realnames(addr): -+ """Strip real names between quotes.""" -+ if '"' not in addr: -+ # Fast path -+ return addr -+ -+ start = 0 -+ open_pos = None -+ result = [] -+ for pos, ch in _iter_escaped_chars(addr): -+ if ch == '"': -+ if open_pos is None: -+ open_pos = pos -+ else: -+ if start != open_pos: -+ result.append(addr[start:open_pos]) -+ start = pos + 1 -+ open_pos = None -+ -+ if start < len(addr): -+ result.append(addr[start:]) -+ -+ return ''.join(result) - --def getaddresses(fieldvalues): -- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" -- all = COMMASPACE.join(str(v) for v in fieldvalues) -- a = _AddressList(all) -- return a.addresslist -+ -+supports_strict_parsing = True -+ -+def getaddresses(fieldvalues, *, strict=True): -+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -+ -+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in -+ its place. -+ -+ If strict is true, use a strict parser which rejects malformed inputs. -+ """ -+ -+ # If strict is true, if the resulting list of parsed addresses is greater -+ # than the number of fieldvalues in the input list, a parsing error has -+ # occurred and consequently a list containing a single empty 2-tuple [('', -+ # '')] is returned in its place. This is done to avoid invalid output. -+ # -+ # Malformed input: getaddresses(['alice@example.com ']) -+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] -+ # Safe output: [('', '')] -+ -+ if not strict: -+ all = COMMASPACE.join(str(v) for v in fieldvalues) -+ a = _AddressList(all) -+ return a.addresslist -+ -+ fieldvalues = [str(v) for v in fieldvalues] -+ fieldvalues = _pre_parse_validation(fieldvalues) -+ addr = COMMASPACE.join(fieldvalues) -+ a = _AddressList(addr) -+ result = _post_parse_validation(a.addresslist) -+ -+ # Treat output as invalid if the number of addresses is not equal to the -+ # expected number of addresses. -+ n = 0 -+ for v in fieldvalues: -+ # When a comma is used in the Real Name part it is not a deliminator. -+ # So strip those out before counting the commas. -+ v = _strip_quoted_realnames(v) -+ # Expected number of addresses: 1 + number of commas -+ n += 1 + v.count(',') -+ if len(result) != n: -+ return [('', '')] -+ -+ return result -+ -+ -+def _check_parenthesis(addr): -+ # Ignore parenthesis in quoted real names. -+ addr = _strip_quoted_realnames(addr) -+ -+ opens = 0 -+ for pos, ch in _iter_escaped_chars(addr): -+ if ch == '(': -+ opens += 1 -+ elif ch == ')': -+ opens -= 1 -+ if opens < 0: -+ return False -+ return (opens == 0) -+ -+ -+def _pre_parse_validation(email_header_fields): -+ accepted_values = [] -+ for v in email_header_fields: -+ if not _check_parenthesis(v): -+ v = "('', '')" -+ accepted_values.append(v) -+ -+ return accepted_values -+ -+ -+def _post_parse_validation(parsed_email_header_tuples): -+ accepted_values = [] -+ # The parser would have parsed a correctly formatted domain-literal -+ # The existence of an [ after parsing indicates a parsing failure -+ for v in parsed_email_header_tuples: -+ if '[' in v[1]: -+ v = ('', '') -+ accepted_values.append(v) -+ -+ return accepted_values - - - def _format_timetuple_and_zone(timetuple, zone): -@@ -202,16 +318,33 @@ - tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) - - --def parseaddr(addr): -+def parseaddr(addr, *, strict=True): - """ - Parse addr into its constituent realname and email address parts. - - Return a tuple of realname and email address, unless the parse fails, in - which case return a 2-tuple of ('', ''). -+ -+ If strict is True, use a strict parser which rejects malformed inputs. - """ -- addrs = _AddressList(addr).addresslist -- if not addrs: -- return '', '' -+ if not strict: -+ addrs = _AddressList(addr).addresslist -+ if not addrs: -+ return ('', '') -+ return addrs[0] -+ -+ if isinstance(addr, list): -+ addr = addr[0] -+ -+ if not isinstance(addr, str): -+ return ('', '') -+ -+ addr = _pre_parse_validation([addr])[0] -+ addrs = _post_parse_validation(_AddressList(addr).addresslist) -+ -+ if not addrs or len(addrs) > 1: -+ return ('', '') -+ - return addrs[0] - - -diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py -index 35ac2dc6ae2..2c1f021d0ab 100644 ---- a/Lib/http/cookies.py -+++ b/Lib/http/cookies.py -@@ -184,8 +184,13 @@ - return '"' + str.translate(_Translator) + '"' - - --_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") --_QuotePatt = re.compile(r"[\\].") -+_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub -+ -+def _unquote_replace(m): -+ if m[1]: -+ return chr(int(m[1], 8)) -+ else: -+ return m[2] - - def _unquote(str): - # If there aren't any doublequotes, -@@ -205,30 +210,7 @@ - # \012 --> \n - # \" --> " - # -- i = 0 -- n = len(str) -- res = [] -- while 0 <= i < n: -- o_match = _OctalPatt.search(str, i) -- q_match = _QuotePatt.search(str, i) -- if not o_match and not q_match: # Neither matched -- res.append(str[i:]) -- break -- # else: -- j = k = -1 -- if o_match: -- j = o_match.start(0) -- if q_match: -- k = q_match.start(0) -- if q_match and (not o_match or k < j): # QuotePatt matched -- res.append(str[i:k]) -- res.append(str[k+1]) -- i = k + 2 -- else: # OctalPatt matched -- res.append(str[i:j]) -- res.append(chr(int(str[j+1:j+4], 8))) -- i = j + 4 -- return _nulljoin(res) -+ return _unquote_sub(_unquote_replace, str) - - # The _getdate() routine is used to set the expiration time in the cookie's HTTP - # header. By default, _getdate() returns the current time in the appropriate diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index f3828b10e1c..665658891df 100644 --- a/Lib/importlib/_bootstrap_external.py @@ -1370,172 +624,6 @@ index 6f91435541b..2cc9d10aac3 100644 raise OSError('source code not available') module = getmodule(object, file) -diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py -index 25f373a06a2..9b35340d9ac 100644 ---- a/Lib/ipaddress.py -+++ b/Lib/ipaddress.py -@@ -1322,18 +1322,41 @@ - @property - @functools.lru_cache() - def is_private(self): -- """Test if this address is allocated for private networks. -+ """``True`` if the address is defined as not globally reachable by -+ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ -+ (for IPv6) with the following exceptions: - -- Returns: -- A boolean, True if the address is reserved per -- iana-ipv4-special-registry. -+ * ``is_private`` is ``False`` for ``100.64.0.0/10`` -+ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the -+ semantics of the underlying IPv4 addresses and the following condition holds -+ (see :attr:`IPv6Address.ipv4_mapped`):: -+ -+ address.is_private == address.ipv4_mapped.is_private - -+ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` -+ IPv4 range where they are both ``False``. - """ -- return any(self in net for net in self._constants._private_networks) -+ return ( -+ any(self in net for net in self._constants._private_networks) -+ and all(self not in net for net in self._constants._private_networks_exceptions) -+ ) - - @property - @functools.lru_cache() - def is_global(self): -+ """``True`` if the address is defined as globally reachable by -+ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ -+ (for IPv6) with the following exception: -+ -+ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the -+ semantics of the underlying IPv4 addresses and the following condition holds -+ (see :attr:`IPv6Address.ipv4_mapped`):: -+ -+ address.is_global == address.ipv4_mapped.is_global -+ -+ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` -+ IPv4 range where they are both ``False``. -+ """ - return self not in self._constants._public_network and not self.is_private - - @property -@@ -1537,13 +1560,15 @@ - - _public_network = IPv4Network('100.64.0.0/10') - -+ # Not globally reachable address blocks listed on -+ # https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml - _private_networks = [ - IPv4Network('0.0.0.0/8'), - IPv4Network('10.0.0.0/8'), - IPv4Network('127.0.0.0/8'), - IPv4Network('169.254.0.0/16'), - IPv4Network('172.16.0.0/12'), -- IPv4Network('192.0.0.0/29'), -+ IPv4Network('192.0.0.0/24'), - IPv4Network('192.0.0.170/31'), - IPv4Network('192.0.2.0/24'), - IPv4Network('192.168.0.0/16'), -@@ -1554,6 +1579,11 @@ - IPv4Network('255.255.255.255/32'), - ] - -+ _private_networks_exceptions = [ -+ IPv4Network('192.0.0.9/32'), -+ IPv4Network('192.0.0.10/32'), -+ ] -+ - _reserved_network = IPv4Network('240.0.0.0/4') - - _unspecified_address = IPv4Address('0.0.0.0') -@@ -1995,23 +2025,42 @@ - @property - @functools.lru_cache() - def is_private(self): -- """Test if this address is allocated for private networks. -+ """``True`` if the address is defined as not globally reachable by -+ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ -+ (for IPv6) with the following exceptions: - -- Returns: -- A boolean, True if the address is reserved per -- iana-ipv6-special-registry. -+ * ``is_private`` is ``False`` for ``100.64.0.0/10`` -+ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the -+ semantics of the underlying IPv4 addresses and the following condition holds -+ (see :attr:`IPv6Address.ipv4_mapped`):: -+ -+ address.is_private == address.ipv4_mapped.is_private - -+ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` -+ IPv4 range where they are both ``False``. - """ -- return any(self in net for net in self._constants._private_networks) -+ ipv4_mapped = self.ipv4_mapped -+ if ipv4_mapped is not None: -+ return ipv4_mapped.is_private -+ return ( -+ any(self in net for net in self._constants._private_networks) -+ and all(self not in net for net in self._constants._private_networks_exceptions) -+ ) - - @property - def is_global(self): -- """Test if this address is allocated for public networks. -+ """``True`` if the address is defined as globally reachable by -+ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ -+ (for IPv6) with the following exception: - -- Returns: -- A boolean, true if the address is not reserved per -- iana-ipv6-special-registry. -+ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the -+ semantics of the underlying IPv4 addresses and the following condition holds -+ (see :attr:`IPv6Address.ipv4_mapped`):: -+ -+ address.is_global == address.ipv4_mapped.is_global - -+ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` -+ IPv4 range where they are both ``False``. - """ - return not self.is_private - -@@ -2252,19 +2301,31 @@ - - _multicast_network = IPv6Network('ff00::/8') - -+ # Not globally reachable address blocks listed on -+ # https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml - _private_networks = [ - IPv6Network('::1/128'), - IPv6Network('::/128'), - IPv6Network('::ffff:0:0/96'), -+ IPv6Network('64:ff9b:1::/48'), - IPv6Network('100::/64'), - IPv6Network('2001::/23'), -- IPv6Network('2001:2::/48'), - IPv6Network('2001:db8::/32'), -- IPv6Network('2001:10::/28'), -+ # IANA says N/A, let's consider it not globally reachable to be safe -+ IPv6Network('2002::/16'), - IPv6Network('fc00::/7'), - IPv6Network('fe80::/10'), - ] - -+ _private_networks_exceptions = [ -+ IPv6Network('2001:1::1/128'), -+ IPv6Network('2001:1::2/128'), -+ IPv6Network('2001:3::/32'), -+ IPv6Network('2001:4:112::/48'), -+ IPv6Network('2001:20::/28'), -+ IPv6Network('2001:30::/28'), -+ ] -+ - _reserved_networks = [ - IPv6Network('::/8'), IPv6Network('100::/8'), - IPv6Network('200::/7'), IPv6Network('400::/6'), diff --git a/Lib/lib2to3/tests/test_parser.py b/Lib/lib2to3/tests/test_parser.py index 3884872437c..1bebf978595 100644 --- a/Lib/lib2to3/tests/test_parser.py @@ -1747,130 +835,6 @@ index 54ffc4fdc03..0710e561c54 100644 def joinuser(*args): return os.path.expanduser(os.path.join(*args)) -diff --git a/Lib/socket.py b/Lib/socket.py -index 46fc49ca323..28360985450 100755 ---- a/Lib/socket.py -+++ b/Lib/socket.py -@@ -588,16 +588,65 @@ - return socket(0, 0, 0, info) - __all__.append("fromshare") - --if hasattr(_socket, "socketpair"): -+# Origin: https://gist.github.com/4325783, by Geert Jansen. Public domain. -+# This is used if _socket doesn't natively provide socketpair. It's -+# always defined so that it can be patched in for testing purposes. -+def _fallback_socketpair(family=AF_INET, type=SOCK_STREAM, proto=0): -+ if family == AF_INET: -+ host = _LOCALHOST -+ elif family == AF_INET6: -+ host = _LOCALHOST_V6 -+ else: -+ raise ValueError("Only AF_INET and AF_INET6 socket address families " -+ "are supported") -+ if type != SOCK_STREAM: -+ raise ValueError("Only SOCK_STREAM socket type is supported") -+ if proto != 0: -+ raise ValueError("Only protocol zero is supported") -+ -+ # We create a connected TCP socket. Note the trick with -+ # setblocking(False) that prevents us from having to create a thread. -+ lsock = socket(family, type, proto) -+ try: -+ lsock.bind((host, 0)) -+ lsock.listen() -+ # On IPv6, ignore flow_info and scope_id -+ addr, port = lsock.getsockname()[:2] -+ csock = socket(family, type, proto) -+ try: -+ csock.setblocking(False) -+ try: -+ csock.connect((addr, port)) -+ except (BlockingIOError, InterruptedError): -+ pass -+ csock.setblocking(True) -+ ssock, _ = lsock.accept() -+ except: -+ csock.close() -+ raise -+ finally: -+ lsock.close() - -- def socketpair(family=None, type=SOCK_STREAM, proto=0): -- """socketpair([family[, type[, proto]]]) -> (socket object, socket object) -+ # Authenticating avoids using a connection from something else -+ # able to connect to {host}:{port} instead of us. -+ # We expect only AF_INET and AF_INET6 families. -+ try: -+ if ( -+ ssock.getsockname() != csock.getpeername() -+ or csock.getsockname() != ssock.getpeername() -+ ): -+ raise ConnectionError("Unexpected peer connection") -+ except: -+ # getsockname() and getpeername() can fail -+ # if either socket isn't connected. -+ ssock.close() -+ csock.close() -+ raise - -- Create a pair of socket objects from the sockets returned by the platform -- socketpair() function. -- The arguments are the same as for socket() except the default family is -- AF_UNIX if defined on the platform; otherwise, the default is AF_INET. -- """ -+ return (ssock, csock) -+ -+if hasattr(_socket, "socketpair"): -+ def socketpair(family=None, type=SOCK_STREAM, proto=0): - if family is None: - try: - family = AF_UNIX -@@ -609,44 +658,7 @@ - return a, b - - else: -- -- # Origin: https://gist.github.com/4325783, by Geert Jansen. Public domain. -- def socketpair(family=AF_INET, type=SOCK_STREAM, proto=0): -- if family == AF_INET: -- host = _LOCALHOST -- elif family == AF_INET6: -- host = _LOCALHOST_V6 -- else: -- raise ValueError("Only AF_INET and AF_INET6 socket address families " -- "are supported") -- if type != SOCK_STREAM: -- raise ValueError("Only SOCK_STREAM socket type is supported") -- if proto != 0: -- raise ValueError("Only protocol zero is supported") -- -- # We create a connected TCP socket. Note the trick with -- # setblocking(False) that prevents us from having to create a thread. -- lsock = socket(family, type, proto) -- try: -- lsock.bind((host, 0)) -- lsock.listen() -- # On IPv6, ignore flow_info and scope_id -- addr, port = lsock.getsockname()[:2] -- csock = socket(family, type, proto) -- try: -- csock.setblocking(False) -- try: -- csock.connect((addr, port)) -- except (BlockingIOError, InterruptedError): -- pass -- csock.setblocking(True) -- ssock, _ = lsock.accept() -- except: -- csock.close() -- raise -- finally: -- lsock.close() -- return (ssock, csock) -+ socketpair = _fallback_socketpair - __all__.append("socketpair") - - socketpair.__doc__ = """socketpair([family[, type[, proto]]]) -> (socket object, socket object) diff --git a/Lib/sqlite3/test/dbapi.py b/Lib/sqlite3/test/dbapi.py index 053543c0536..51ef3fe0cd8 100644 --- a/Lib/sqlite3/test/dbapi.py @@ -2043,168 +1007,6 @@ index e3f79bfde52..8d034fe69a0 100644 return "%s-%s-%s" % (osname, release, machine) -diff --git a/Lib/tarfile.py b/Lib/tarfile.py -index 7a6158c2eb9..d75ba50b667 100755 ---- a/Lib/tarfile.py -+++ b/Lib/tarfile.py -@@ -840,6 +840,9 @@ - # Sentinel for replace() defaults, meaning "don't change the attribute" - _KEEP = object() - -+# Header length is digits followed by a space. -+_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") -+ - class TarInfo(object): - """Informational class which holds the details about an - archive member given by a tar header block. -@@ -1399,41 +1402,59 @@ - else: - pax_headers = tarfile.pax_headers.copy() - -- # Check if the pax header contains a hdrcharset field. This tells us -- # the encoding of the path, linkpath, uname and gname fields. Normally, -- # these fields are UTF-8 encoded but since POSIX.1-2008 tar -- # implementations are allowed to store them as raw binary strings if -- # the translation to UTF-8 fails. -- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) -- if match is not None: -- pax_headers["hdrcharset"] = match.group(1).decode("utf-8") -- -- # For the time being, we don't care about anything other than "BINARY". -- # The only other value that is currently allowed by the standard is -- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. -- hdrcharset = pax_headers.get("hdrcharset") -- if hdrcharset == "BINARY": -- encoding = tarfile.encoding -- else: -- encoding = "utf-8" -- - # Parse pax header information. A record looks like that: - # "%d %s=%s\n" % (length, keyword, value). length is the size - # of the complete record including the length field itself and -- # the newline. keyword and value are both UTF-8 encoded strings. -- regex = re.compile(br"(\d+) ([^=]+)=") -+ # the newline. - pos = 0 -- while True: -- match = regex.match(buf, pos) -- if not match: -- break -+ encoding = None -+ raw_headers = [] -+ while len(buf) > pos and buf[pos] != 0x00: -+ if not (match := _header_length_prefix_re.match(buf, pos)): -+ raise InvalidHeaderError("invalid header") -+ try: -+ length = int(match.group(1)) -+ except ValueError: -+ raise InvalidHeaderError("invalid header") -+ # Headers must be at least 5 bytes, shortest being '5 x=\n'. -+ # Value is allowed to be empty. -+ if length < 5: -+ raise InvalidHeaderError("invalid header") -+ if pos + length > len(buf): -+ raise InvalidHeaderError("invalid header") - -- length, keyword = match.groups() -- length = int(length) -- if length == 0: -+ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header -+ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] -+ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") -+ -+ # Check the framing of the header. The last character must be '\n' (0x0A) -+ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: - raise InvalidHeaderError("invalid header") -- value = buf[match.end(2) + 1:match.start(1) + length - 1] -+ raw_headers.append((length, raw_keyword, raw_value)) -+ -+ # Check if the pax header contains a hdrcharset field. This tells us -+ # the encoding of the path, linkpath, uname and gname fields. Normally, -+ # these fields are UTF-8 encoded but since POSIX.1-2008 tar -+ # implementations are allowed to store them as raw binary strings if -+ # the translation to UTF-8 fails. For the time being, we don't care about -+ # anything other than "BINARY". The only other value that is currently -+ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. -+ # Note that we only follow the initial 'hdrcharset' setting to preserve -+ # the initial behavior of the 'tarfile' module. -+ if raw_keyword == b"hdrcharset" and encoding is None: -+ if raw_value == b"BINARY": -+ encoding = tarfile.encoding -+ else: # This branch ensures only the first 'hdrcharset' header is used. -+ encoding = "utf-8" -+ -+ pos += length - -+ # If no explicit hdrcharset is set, we use UTF-8 as a default. -+ if encoding is None: -+ encoding = "utf-8" -+ -+ # After parsing the raw headers we can decode them to text. -+ for length, raw_keyword, raw_value in raw_headers: - # Normally, we could just use "utf-8" as the encoding and "strict" - # as the error handler, but we better not take the risk. For - # example, GNU tar <= 1.23 is known to store filenames it cannot -@@ -1441,17 +1462,16 @@ - # hdrcharset=BINARY header). - # We first try the strict standard encoding, and if that fails we - # fall back on the user's encoding and error handler. -- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", -+ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", - tarfile.errors) - if keyword in PAX_NAME_FIELDS: -- value = self._decode_pax_field(value, encoding, tarfile.encoding, -+ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, - tarfile.errors) - else: -- value = self._decode_pax_field(value, "utf-8", "utf-8", -+ value = self._decode_pax_field(raw_value, "utf-8", "utf-8", - tarfile.errors) - - pax_headers[keyword] = value -- pos += length - - # Fetch the next header. - try: -@@ -1466,7 +1486,7 @@ - - elif "GNU.sparse.size" in pax_headers: - # GNU extended sparse format version 0.0. -- self._proc_gnusparse_00(next, pax_headers, buf) -+ self._proc_gnusparse_00(next, raw_headers) - - elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": - # GNU extended sparse format version 1.0. -@@ -1488,15 +1508,24 @@ - - return next - -- def _proc_gnusparse_00(self, next, pax_headers, buf): -+ def _proc_gnusparse_00(self, next, raw_headers): - """Process a GNU tar extended sparse header, version 0.0. - """ - offsets = [] -- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): -- offsets.append(int(match.group(1))) - numbytes = [] -- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): -- numbytes.append(int(match.group(1))) -+ for _, keyword, value in raw_headers: -+ if keyword == b"GNU.sparse.offset": -+ try: -+ offsets.append(int(value.decode())) -+ except ValueError: -+ raise InvalidHeaderError("invalid header") -+ -+ elif keyword == b"GNU.sparse.numbytes": -+ try: -+ numbytes.append(int(value.decode())) -+ except ValueError: -+ raise InvalidHeaderError("invalid header") -+ - next.sparse = list(zip(offsets, numbytes)) - - def _proc_gnusparse_01(self, next, pax_headers): diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index aa5fe82afb0..21592d59d4a 100644 --- a/Lib/test/datetimetester.py @@ -2568,88 +1370,6 @@ index 70d306ffe8f..194729ed83f 100644 def test_create_unix_server_ssl_bool(self): coro = self.loop.create_unix_server(lambda: None, path='spam', -diff --git a/Lib/test/test_asyncio/test_windows_events.py b/Lib/test/test_asyncio/test_windows_events.py -index f276cd205a2..a8939d58540 100644 ---- a/Lib/test/test_asyncio/test_windows_events.py -+++ b/Lib/test/test_asyncio/test_windows_events.py -@@ -36,7 +36,23 @@ - self.trans.close() - - --class ProactorLoopCtrlC(test_utils.TestCase): -+class WindowsEventsTestCase(test_utils.TestCase): -+ def _unraisablehook(self, unraisable): -+ # Storing unraisable.object can resurrect an object which is being -+ # finalized. Storing unraisable.exc_value creates a reference cycle. -+ self._unraisable = unraisable -+ print(unraisable) -+ -+ def setUp(self): -+ self._prev_unraisablehook = sys.unraisablehook -+ self._unraisable = None -+ sys.unraisablehook = self._unraisablehook -+ -+ def tearDown(self): -+ sys.unraisablehook = self._prev_unraisablehook -+ self.assertIsNone(self._unraisable) -+ -+class ProactorLoopCtrlC(WindowsEventsTestCase): - - def test_ctrl_c(self): - -@@ -58,7 +74,7 @@ - thread.join() - - --class ProactorMultithreading(test_utils.TestCase): -+class ProactorMultithreading(WindowsEventsTestCase): - def test_run_from_nonmain_thread(self): - finished = False - -@@ -79,7 +95,7 @@ - self.assertTrue(finished) - - --class ProactorTests(test_utils.TestCase): -+class ProactorTests(WindowsEventsTestCase): - - def setUp(self): - super().setUp() -@@ -239,8 +255,32 @@ - self.close_loop(self.loop) - self.assertFalse(self.loop.call_exception_handler.called) - -- --class WinPolicyTests(test_utils.TestCase): -+ def test_loop_restart(self): -+ # We're fishing for the "RuntimeError: <_overlapped.Overlapped object at XXX> -+ # still has pending operation at deallocation, the process may crash" error -+ stop = threading.Event() -+ def threadMain(): -+ while not stop.is_set(): -+ self.loop.call_soon_threadsafe(lambda: None) -+ time.sleep(0.01) -+ thr = threading.Thread(target=threadMain) -+ -+ # In 10 60-second runs of this test prior to the fix: -+ # time in seconds until failure: (none), 15.0, 6.4, (none), 7.6, 8.3, 1.7, 22.2, 23.5, 8.3 -+ # 10 seconds had a 50% failure rate but longer would be more costly -+ end_time = time.time() + 10 # Run for 10 seconds -+ self.loop.call_soon(thr.start) -+ while not self._unraisable: # Stop if we got an unraisable exc -+ self.loop.stop() -+ self.loop.run_forever() -+ if time.time() >= end_time: -+ break -+ -+ stop.set() -+ thr.join() -+ -+ -+class WinPolicyTests(WindowsEventsTestCase): - - def test_selector_win_policy(self): - async def main(): diff --git a/Lib/test/test_asyncio/utils.py b/Lib/test/test_asyncio/utils.py index 2570433e679..d1dd43cad78 100644 --- a/Lib/test/test_asyncio/utils.py @@ -2928,367 +1648,6 @@ index a5f8f6465e8..e5d34358517 100644 def test_all(self): # Run the tester in a sub-process, to make sure there is only one # thread (for reliable signal delivery). -diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py -index 761ea90b78f..0c689643ded 100644 ---- a/Lib/test/test_email/test_email.py -+++ b/Lib/test/test_email/test_email.py -@@ -16,6 +16,7 @@ - - import email - import email.policy -+import email.utils - - from email.charset import Charset - from email.header import Header, decode_header, make_header -@@ -3263,15 +3264,154 @@ - [('Al Person', 'aperson@dom.ain'), - ('Bud Person', 'bperson@dom.ain')]) - -+ def test_getaddresses_comma_in_name(self): -+ """GH-106669 regression test.""" -+ self.assertEqual( -+ utils.getaddresses( -+ [ -+ '"Bud, Person" ', -+ 'aperson@dom.ain (Al Person)', -+ '"Mariusz Felisiak" ', -+ ] -+ ), -+ [ -+ ('Bud, Person', 'bperson@dom.ain'), -+ ('Al Person', 'aperson@dom.ain'), -+ ('Mariusz Felisiak', 'to@example.com'), -+ ], -+ ) -+ -+ def test_parsing_errors(self): -+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" -+ alice = 'alice@example.org' -+ bob = 'bob@example.com' -+ empty = ('', '') -+ -+ # Test utils.getaddresses() and utils.parseaddr() on malformed email -+ # addresses: default behavior (strict=True) rejects malformed address, -+ # and strict=False which tolerates malformed address. -+ for invalid_separator, expected_non_strict in ( -+ ('(', [(f'<{bob}>', alice)]), -+ (')', [('', alice), empty, ('', bob)]), -+ ('<', [('', alice), empty, ('', bob), empty]), -+ ('>', [('', alice), empty, ('', bob)]), -+ ('[', [('', f'{alice}[<{bob}>]')]), -+ (']', [('', alice), empty, ('', bob)]), -+ ('@', [empty, empty, ('', bob)]), -+ (';', [('', alice), empty, ('', bob)]), -+ (':', [('', alice), ('', bob)]), -+ ('.', [('', alice + '.'), ('', bob)]), -+ ('"', [('', alice), ('', f'<{bob}>')]), -+ ): -+ address = f'{alice}{invalid_separator}<{bob}>' -+ with self.subTest(address=address): -+ self.assertEqual(utils.getaddresses([address]), -+ [empty]) -+ self.assertEqual(utils.getaddresses([address], strict=False), -+ expected_non_strict) -+ -+ self.assertEqual(utils.parseaddr([address]), -+ empty) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Comma (',') is treated differently depending on strict parameter. -+ # Comma without quotes. -+ address = f'{alice},<{bob}>' -+ self.assertEqual(utils.getaddresses([address]), -+ [('', alice), ('', bob)]) -+ self.assertEqual(utils.getaddresses([address], strict=False), -+ [('', alice), ('', bob)]) -+ self.assertEqual(utils.parseaddr([address]), -+ empty) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Real name between quotes containing comma. -+ address = '"Alice, alice@example.org" ' -+ expected_strict = ('Alice, alice@example.org', 'bob@example.com') -+ self.assertEqual(utils.getaddresses([address]), [expected_strict]) -+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) -+ self.assertEqual(utils.parseaddr([address]), expected_strict) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Valid parenthesis in comments. -+ address = 'alice@example.org (Alice)' -+ expected_strict = ('Alice', 'alice@example.org') -+ self.assertEqual(utils.getaddresses([address]), [expected_strict]) -+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) -+ self.assertEqual(utils.parseaddr([address]), expected_strict) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Invalid parenthesis in comments. -+ address = 'alice@example.org )Alice(' -+ self.assertEqual(utils.getaddresses([address]), [empty]) -+ self.assertEqual(utils.getaddresses([address], strict=False), -+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) -+ self.assertEqual(utils.parseaddr([address]), empty) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Two addresses with quotes separated by comma. -+ address = '"Jane Doe" , "John Doe" ' -+ self.assertEqual(utils.getaddresses([address]), -+ [('Jane Doe', 'jane@example.net'), -+ ('John Doe', 'john@example.net')]) -+ self.assertEqual(utils.getaddresses([address], strict=False), -+ [('Jane Doe', 'jane@example.net'), -+ ('John Doe', 'john@example.net')]) -+ self.assertEqual(utils.parseaddr([address]), empty) -+ self.assertEqual(utils.parseaddr([address], strict=False), -+ ('', address)) -+ -+ # Test email.utils.supports_strict_parsing attribute -+ self.assertEqual(email.utils.supports_strict_parsing, True) -+ - def test_getaddresses_nasty(self): -- eq = self.assertEqual -- eq(utils.getaddresses(['foo: ;']), [('', '')]) -- eq(utils.getaddresses( -- ['[]*-- =~$']), -- [('', ''), ('', ''), ('', '*--')]) -- eq(utils.getaddresses( -- ['foo: ;', '"Jason R. Mastaler" ']), -- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) -+ for addresses, expected in ( -+ (['"Sürname, Firstname" '], -+ [('Sürname, Firstname', 'to@example.com')]), -+ -+ (['foo: ;'], -+ [('', '')]), -+ -+ (['foo: ;', '"Jason R. Mastaler" '], -+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), -+ -+ ([r'Pete(A nice \) chap) '], -+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), -+ -+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], -+ [('', '')]), -+ -+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], -+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), -+ -+ (['John Doe '], -+ [('John Doe (comment)', 'jdoe@machine.example')]), -+ -+ (['"Mary Smith: Personal Account" '], -+ [('Mary Smith: Personal Account', 'smith@home.example')]), -+ -+ (['Undisclosed recipients:;'], -+ [('', '')]), -+ -+ ([r', "Giant; \"Big\" Box" '], -+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), -+ ): -+ with self.subTest(addresses=addresses): -+ self.assertEqual(utils.getaddresses(addresses), -+ expected) -+ self.assertEqual(utils.getaddresses(addresses, strict=False), -+ expected) -+ -+ addresses = ['[]*-- =~$'] -+ self.assertEqual(utils.getaddresses(addresses), -+ [('', '')]) -+ self.assertEqual(utils.getaddresses(addresses, strict=False), -+ [('', ''), ('', ''), ('', '*--')]) - - def test_getaddresses_embedded_comment(self): - """Test proper handling of a nested comment""" -@@ -3460,6 +3600,54 @@ - m = cls(*constructor, policy=email.policy.default) - self.assertIs(m.policy, email.policy.default) - -+ def test_iter_escaped_chars(self): -+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), -+ [(0, 'a'), -+ (2, '\\\\'), -+ (3, 'b'), -+ (5, '\\"'), -+ (6, 'c'), -+ (8, '\\\\'), -+ (9, '"'), -+ (10, 'd')]) -+ self.assertEqual(list(utils._iter_escaped_chars('a\\')), -+ [(0, 'a'), (1, '\\')]) -+ -+ def test_strip_quoted_realnames(self): -+ def check(addr, expected): -+ self.assertEqual(utils._strip_quoted_realnames(addr), expected) -+ -+ check('"Jane Doe" , "John Doe" ', -+ ' , ') -+ check(r'"Jane \"Doe\"." ', -+ ' ') -+ -+ # special cases -+ check(r'before"name"after', 'beforeafter') -+ check(r'before"name"', 'before') -+ check(r'b"name"', 'b') # single char -+ check(r'"name"after', 'after') -+ check(r'"name"a', 'a') # single char -+ check(r'"name"', '') -+ -+ # no change -+ for addr in ( -+ 'Jane Doe , John Doe ', -+ 'lone " quote', -+ ): -+ self.assertEqual(utils._strip_quoted_realnames(addr), addr) -+ -+ -+ def test_check_parenthesis(self): -+ addr = 'alice@example.net' -+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) -+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) -+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) -+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) -+ -+ # Ignore real name between quotes -+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) -+ - - # Test the iterator/generators - class TestIterators(TestEmailBase): -diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py -index 89e7edeb63a..d29400f0ed1 100644 ---- a/Lib/test/test_email/test_generator.py -+++ b/Lib/test/test_email/test_generator.py -@@ -6,6 +6,7 @@ - from email.generator import Generator, BytesGenerator - from email.headerregistry import Address - from email import policy -+import email.errors - from test.test_email import TestEmailBase, parameterize - - -@@ -216,6 +217,44 @@ - g.flatten(msg) - self.assertEqual(s.getvalue(), self.typ(expected)) - -+ def test_keep_encoded_newlines(self): -+ msg = self.msgmaker(self.typ(textwrap.dedent("""\ -+ To: nobody -+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com -+ -+ None -+ """))) -+ expected = textwrap.dedent("""\ -+ To: nobody -+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com -+ -+ None -+ """) -+ s = self.ioclass() -+ g = self.genclass(s, policy=self.policy.clone(max_line_length=80)) -+ g.flatten(msg) -+ self.assertEqual(s.getvalue(), self.typ(expected)) -+ -+ def test_keep_long_encoded_newlines(self): -+ msg = self.msgmaker(self.typ(textwrap.dedent("""\ -+ To: nobody -+ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com -+ -+ None -+ """))) -+ expected = textwrap.dedent("""\ -+ To: nobody -+ Subject: Bad subject -+ =?utf-8?q?=0A?=Bcc: -+ injection@example.com -+ -+ None -+ """) -+ s = self.ioclass() -+ g = self.genclass(s, policy=self.policy.clone(max_line_length=30)) -+ g.flatten(msg) -+ self.assertEqual(s.getvalue(), self.typ(expected)) -+ - - class TestGenerator(TestGeneratorBase, TestEmailBase): - -@@ -224,6 +263,29 @@ - ioclass = io.StringIO - typ = str - -+ def test_verify_generated_headers(self): -+ """gh-121650: by default the generator prevents header injection""" -+ class LiteralHeader(str): -+ name = 'Header' -+ def fold(self, **kwargs): -+ return self -+ -+ for text in ( -+ 'Value\r\nBad Injection\r\n', -+ 'NoNewLine' -+ ): -+ with self.subTest(text=text): -+ message = message_from_string( -+ "Header: Value\r\n\r\nBody", -+ policy=self.policy, -+ ) -+ -+ del message['Header'] -+ message['Header'] = LiteralHeader(text) -+ -+ with self.assertRaises(email.errors.HeaderWriteError): -+ message.as_string() -+ - - class TestBytesGenerator(TestGeneratorBase, TestEmailBase): - -diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py -index e87c2755494..ff1ddf7d7a8 100644 ---- a/Lib/test/test_email/test_policy.py -+++ b/Lib/test/test_email/test_policy.py -@@ -26,6 +26,7 @@ - 'raise_on_defect': False, - 'mangle_from_': True, - 'message_factory': None, -+ 'verify_generated_headers': True, - } - # These default values are the ones set on email.policy.default. - # If any of these defaults change, the docs must be updated. -@@ -277,6 +278,31 @@ - with self.assertRaises(email.errors.HeaderParseError): - policy.fold("Subject", subject) - -+ def test_verify_generated_headers(self): -+ """Turning protection off allows header injection""" -+ policy = email.policy.default.clone(verify_generated_headers=False) -+ for text in ( -+ 'Header: Value\r\nBad: Injection\r\n', -+ 'Header: NoNewLine' -+ ): -+ with self.subTest(text=text): -+ message = email.message_from_string( -+ "Header: Value\r\n\r\nBody", -+ policy=policy, -+ ) -+ class LiteralHeader(str): -+ name = 'Header' -+ def fold(self, **kwargs): -+ return self -+ -+ del message['Header'] -+ message['Header'] = LiteralHeader(text) -+ -+ self.assertEqual( -+ message.as_string(), -+ f"{text}\nBody", -+ ) -+ - # XXX: Need subclassing tests. - # For adding subclassed objects, make sure the usual rules apply (subclass - # wins), but that the order still works (right overrides left). diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 8e5cfa4824b..5311ba912c3 100644 --- a/Lib/test/test_embed.py @@ -3511,62 +1870,6 @@ index 1d41efa6c1f..01603382af5 100644 @create_and_remove_directory(TEMPDIR) def test_compress_stdin_outfile(self): args = sys.executable, '-m', 'gzip' -diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py -index 6072c7e15e9..644e75cd5b7 100644 ---- a/Lib/test/test_http_cookies.py -+++ b/Lib/test/test_http_cookies.py -@@ -5,6 +5,7 @@ - import unittest - from http import cookies - import pickle -+from test import support - - - class CookieTests(unittest.TestCase): -@@ -58,6 +59,43 @@ - for k, v in sorted(case['dict'].items()): - self.assertEqual(C[k].value, v) - -+ def test_unquote(self): -+ cases = [ -+ (r'a="b=\""', 'b="'), -+ (r'a="b=\\"', 'b=\\'), -+ (r'a="b=\="', 'b=='), -+ (r'a="b=\n"', 'b=n'), -+ (r'a="b=\042"', 'b="'), -+ (r'a="b=\134"', 'b=\\'), -+ (r'a="b=\377"', 'b=\xff'), -+ (r'a="b=\400"', 'b=400'), -+ (r'a="b=\42"', 'b=42'), -+ (r'a="b=\\042"', 'b=\\042'), -+ (r'a="b=\\134"', 'b=\\134'), -+ (r'a="b=\\\""', 'b=\\"'), -+ (r'a="b=\\\042"', 'b=\\"'), -+ (r'a="b=\134\""', 'b=\\"'), -+ (r'a="b=\134\042"', 'b=\\"'), -+ ] -+ for encoded, decoded in cases: -+ with self.subTest(encoded): -+ C = cookies.SimpleCookie() -+ C.load(encoded) -+ self.assertEqual(C['a'].value, decoded) -+ -+ @support.requires_resource('cpu') -+ def test_unquote_large(self): -+ n = 10**6 -+ for encoded in r'\\', r'\134': -+ with self.subTest(encoded): -+ data = 'a="b=' + encoded*n + ';"' -+ C = cookies.SimpleCookie() -+ C.load(data) -+ value = C['a'].value -+ self.assertEqual(value[:3], 'b=\\') -+ self.assertEqual(value[-2:], '\\;') -+ self.assertEqual(len(value), n + 3) -+ - def test_load(self): - C = cookies.SimpleCookie() - C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme') diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index b70a109c27c..5a781b5f46c 100644 --- a/Lib/test/test_httpservers.py @@ -3901,83 +2204,6 @@ index feee861830e..1d5c61ada42 100644 support.requires( 'largefile', 'test requires %s bytes and a long time to run' % self.LARGE) -diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py -index 90897f6bedb..bd14f04f6c6 100644 ---- a/Lib/test/test_ipaddress.py -+++ b/Lib/test/test_ipaddress.py -@@ -2263,6 +2263,10 @@ - self.assertEqual(True, ipaddress.ip_address( - '172.31.255.255').is_private) - self.assertEqual(False, ipaddress.ip_address('172.32.0.0').is_private) -+ self.assertFalse(ipaddress.ip_address('192.0.0.0').is_global) -+ self.assertTrue(ipaddress.ip_address('192.0.0.9').is_global) -+ self.assertTrue(ipaddress.ip_address('192.0.0.10').is_global) -+ self.assertFalse(ipaddress.ip_address('192.0.0.255').is_global) - - self.assertEqual(True, - ipaddress.ip_address('169.254.100.200').is_link_local) -@@ -2278,6 +2282,40 @@ - self.assertEqual(False, ipaddress.ip_address('128.0.0.0').is_loopback) - self.assertEqual(True, ipaddress.ip_network('0.0.0.0').is_unspecified) - -+ def testPrivateNetworks(self): -+ self.assertEqual(True, ipaddress.ip_network("0.0.0.0/0").is_private) -+ self.assertEqual(False, ipaddress.ip_network("1.0.0.0/8").is_private) -+ -+ self.assertEqual(True, ipaddress.ip_network("0.0.0.0/8").is_private) -+ self.assertEqual(True, ipaddress.ip_network("10.0.0.0/8").is_private) -+ self.assertEqual(True, ipaddress.ip_network("127.0.0.0/8").is_private) -+ self.assertEqual(True, ipaddress.ip_network("169.254.0.0/16").is_private) -+ self.assertEqual(True, ipaddress.ip_network("172.16.0.0/12").is_private) -+ self.assertEqual(True, ipaddress.ip_network("192.0.0.0/29").is_private) -+ self.assertEqual(False, ipaddress.ip_network("192.0.0.9/32").is_private) -+ self.assertEqual(True, ipaddress.ip_network("192.0.0.170/31").is_private) -+ self.assertEqual(True, ipaddress.ip_network("192.0.2.0/24").is_private) -+ self.assertEqual(True, ipaddress.ip_network("192.168.0.0/16").is_private) -+ self.assertEqual(True, ipaddress.ip_network("198.18.0.0/15").is_private) -+ self.assertEqual(True, ipaddress.ip_network("198.51.100.0/24").is_private) -+ self.assertEqual(True, ipaddress.ip_network("203.0.113.0/24").is_private) -+ self.assertEqual(True, ipaddress.ip_network("240.0.0.0/4").is_private) -+ self.assertEqual(True, ipaddress.ip_network("255.255.255.255/32").is_private) -+ -+ self.assertEqual(False, ipaddress.ip_network("::/0").is_private) -+ self.assertEqual(False, ipaddress.ip_network("::ff/128").is_private) -+ -+ self.assertEqual(True, ipaddress.ip_network("::1/128").is_private) -+ self.assertEqual(True, ipaddress.ip_network("::/128").is_private) -+ self.assertEqual(True, ipaddress.ip_network("::ffff:0:0/96").is_private) -+ self.assertEqual(True, ipaddress.ip_network("100::/64").is_private) -+ self.assertEqual(True, ipaddress.ip_network("2001:2::/48").is_private) -+ self.assertEqual(False, ipaddress.ip_network("2001:3::/48").is_private) -+ self.assertEqual(True, ipaddress.ip_network("2001:db8::/32").is_private) -+ self.assertEqual(True, ipaddress.ip_network("2001:10::/28").is_private) -+ self.assertEqual(True, ipaddress.ip_network("fc00::/7").is_private) -+ self.assertEqual(True, ipaddress.ip_network("fe80::/10").is_private) -+ - def testReservedIpv6(self): - - self.assertEqual(True, ipaddress.ip_network('ffff::').is_multicast) -@@ -2351,6 +2389,20 @@ - self.assertEqual(True, ipaddress.ip_address('0::0').is_unspecified) - self.assertEqual(False, ipaddress.ip_address('::1').is_unspecified) - -+ self.assertFalse(ipaddress.ip_address('64:ff9b:1::').is_global) -+ self.assertFalse(ipaddress.ip_address('2001::').is_global) -+ self.assertTrue(ipaddress.ip_address('2001:1::1').is_global) -+ self.assertTrue(ipaddress.ip_address('2001:1::2').is_global) -+ self.assertFalse(ipaddress.ip_address('2001:2::').is_global) -+ self.assertTrue(ipaddress.ip_address('2001:3::').is_global) -+ self.assertFalse(ipaddress.ip_address('2001:4::').is_global) -+ self.assertTrue(ipaddress.ip_address('2001:4:112::').is_global) -+ self.assertFalse(ipaddress.ip_address('2001:10::').is_global) -+ self.assertTrue(ipaddress.ip_address('2001:20::').is_global) -+ self.assertTrue(ipaddress.ip_address('2001:30::').is_global) -+ self.assertFalse(ipaddress.ip_address('2001:40::').is_global) -+ self.assertFalse(ipaddress.ip_address('2002::').is_global) -+ - # some generic IETF reserved addresses - self.assertEqual(True, ipaddress.ip_address('100::').is_reserved) - self.assertEqual(True, ipaddress.ip_network('4000::1/128').is_reserved) diff --git a/Lib/test/test_json/test_tool.py b/Lib/test/test_json/test_tool.py index d441bb15a7e..75e4b84fea7 100644 --- a/Lib/test/test_json/test_tool.py @@ -4155,7 +2381,7 @@ index 6558952308f..9c3901a63f3 100644 if __name__ == '__main__': diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py -index 78dd3151b36..e1c02ca4984 100644 +index f8b4e091e08..e1c02ca4984 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -31,6 +31,8 @@ @@ -4175,26 +2401,7 @@ index 78dd3151b36..e1c02ca4984 100644 def test_putenv_unsetenv(self): name = "PYTHONTESTVAR" value = "testvalue" -@@ -1493,6 +1496,18 @@ - self.assertRaises(OSError, os.makedirs, path, exist_ok=True) - os.remove(path) - -+ @unittest.skipUnless(os.name == 'nt', "requires Windows") -+ def test_win32_mkdir_700(self): -+ base = support.TESTFN -+ path = os.path.abspath(os.path.join(support.TESTFN, 'dir')) -+ os.mkdir(path, mode=0o700) -+ out = subprocess.check_output(["cacls.exe", path, "/s"], encoding="oem") -+ os.rmdir(path) -+ self.assertEqual( -+ out.strip(), -+ f'{path} "D:P(A;OICI;FA;;;SY)(A;OICI;FA;;;BA)(A;OICI;FA;;;OW)"', -+ ) -+ - def tearDown(self): - path = os.path.join(support.TESTFN, 'dir1', 'dir2', 'dir3', - 'dir4', 'dir5', 'dir6') -@@ -2007,6 +2022,7 @@ +@@ -2019,6 +2022,7 @@ self.check(os.fchown, -1, -1) @unittest.skipUnless(hasattr(os, 'fpathconf'), 'test needs os.fpathconf()') @@ -4202,7 +2409,7 @@ index 78dd3151b36..e1c02ca4984 100644 def test_fpathconf(self): self.check(os.pathconf, "PC_NAME_MAX") self.check(os.fpathconf, "PC_NAME_MAX") -@@ -2133,6 +2149,7 @@ +@@ -2145,6 +2149,7 @@ self.assertRaises(OverflowError, os.setreuid, 0, self.UID_OVERFLOW) @unittest.skipUnless(hasattr(os, 'setreuid'), 'test needs os.setreuid()') @@ -4210,7 +2417,7 @@ index 78dd3151b36..e1c02ca4984 100644 def test_setreuid_neg1(self): # Needs to accept -1. We run this in a subprocess to avoid # altering the test runner's process state (issue8045). -@@ -2150,6 +2167,7 @@ +@@ -2162,6 +2167,7 @@ self.assertRaises(OverflowError, os.setregid, 0, self.GID_OVERFLOW) @unittest.skipUnless(hasattr(os, 'setregid'), 'test needs os.setregid()') @@ -4218,7 +2425,7 @@ index 78dd3151b36..e1c02ca4984 100644 def test_setregid_neg1(self): # Needs to accept -1. We run this in a subprocess to avoid # altering the test runner's process state (issue8045). -@@ -2822,6 +2840,7 @@ +@@ -2834,6 +2840,7 @@ class PidTests(unittest.TestCase): @unittest.skipUnless(hasattr(os, 'getppid'), "test needs os.getppid") @@ -4226,7 +2433,7 @@ index 78dd3151b36..e1c02ca4984 100644 def test_getppid(self): p = subprocess.Popen([sys.executable, '-c', 'import os; print(os.getppid())'], -@@ -2830,6 +2849,7 @@ +@@ -2842,6 +2849,7 @@ # We are the parent of our subprocess self.assertEqual(int(stdout), os.getpid()) @@ -4234,7 +2441,7 @@ index 78dd3151b36..e1c02ca4984 100644 def check_waitpid(self, code, exitcode, callback=None): if sys.platform == 'win32': # On Windows, os.spawnv() simply joins arguments with spaces: -@@ -2892,6 +2912,7 @@ +@@ -2904,6 +2912,7 @@ self.check_waitpid(code, exitcode=-signum, callback=kill_process) @@ -4242,7 +2449,7 @@ index 78dd3151b36..e1c02ca4984 100644 class SpawnTests(unittest.TestCase): def create_args(self, *, with_env=False, use_bytes=False): self.exitcode = 17 -@@ -2974,6 +2995,7 @@ +@@ -2986,6 +2995,7 @@ self.assertEqual(exitcode, self.exitcode) @requires_os_func('spawnv') @@ -4250,7 +2457,7 @@ index 78dd3151b36..e1c02ca4984 100644 def test_nowait(self): args = self.create_args() pid = os.spawnv(os.P_NOWAIT, args[0], args) -@@ -3517,6 +3539,7 @@ +@@ -3529,6 +3539,7 @@ self.assertGreaterEqual(size.columns, 0) self.assertGreaterEqual(size.lines, 0) @@ -4674,34 +2881,6 @@ index c734f0d6ce2..c5c68916269 100644 def assertSigInt(self, *args, **kwargs): proc = subprocess.run(*args, **kwargs, text=True, stderr=subprocess.PIPE) self.assertTrue(proc.stderr.endswith("\nKeyboardInterrupt\n")) -diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py -index e6b9349fcdd..41f51f0c945 100644 ---- a/Lib/test/test_sax.py -+++ b/Lib/test/test_sax.py -@@ -1211,10 +1211,10 @@ - - self.assertEqual(result.getvalue(), start + b"text") - -+ @unittest.skipIf(pyexpat.version_info < (2, 6, 0), -+ f'Expat {pyexpat.version_info} does not ' -+ 'support reparse deferral') - def test_flush_reparse_deferral_enabled(self): -- if pyexpat.version_info < (2, 6, 0): -- self.skipTest(f'Expat {pyexpat.version_info} does not support reparse deferral') -- - result = BytesIO() - xmlgen = XMLGenerator(result) - parser = create_parser() -@@ -1247,8 +1247,8 @@ - - if pyexpat.version_info >= (2, 6, 0): - parser._parser.SetReparseDeferralEnabled(False) -+ self.assertEqual(result.getvalue(), start) # i.e. no elements started - -- self.assertEqual(result.getvalue(), start) # i.e. no elements started - self.assertFalse(parser._parser.GetReparseDeferralEnabled()) - - parser.flush() diff --git a/Lib/test/test_select.py b/Lib/test/test_select.py index 458998a62fd..ffdad07a0f9 100644 --- a/Lib/test/test_select.py @@ -4903,7 +3082,7 @@ index e578cd7db3d..11b7b395a5e 100644 libpath = os.path.dirname(os.path.dirname(encodings.__file__)) exe_prefix = os.path.dirname(sys.executable) diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py -index 043e5543889..87c4fed1461 100755 +index b36cb5beaec..87c4fed1461 100755 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -1,6 +1,7 @@ @@ -4914,38 +3093,7 @@ index 043e5543889..87c4fed1461 100755 import errno import io -@@ -555,19 +556,27 @@ - def __init__(self, methodName='runTest'): - unittest.TestCase.__init__(self, methodName=methodName) - ThreadableTest.__init__(self) -+ self.cli = None -+ self.serv = None -+ -+ def socketpair(self): -+ # To be overridden by some child classes. -+ return socket.socketpair() - - def setUp(self): -- self.serv, self.cli = socket.socketpair() -+ self.serv, self.cli = self.socketpair() - - def tearDown(self): -- self.serv.close() -+ if self.serv: -+ self.serv.close() - self.serv = None - - def clientSetUp(self): - pass - - def clientTearDown(self): -- self.cli.close() -+ if self.cli: -+ self.cli.close() - self.cli = None - ThreadableTest.clientTearDown(self) - -@@ -690,7 +699,7 @@ +@@ -698,7 +699,7 @@ super().setUp() def bindSock(self, sock): @@ -4954,7 +3102,7 @@ index 043e5543889..87c4fed1461 100755 socket_helper.bind_unix_socket(sock, path) self.addCleanup(support.unlink, path) -@@ -1155,8 +1164,11 @@ +@@ -1163,8 +1164,11 @@ # Find one service that exists, then check all the related interfaces. # I've ordered this by protocols that have both a tcp and udp # protocol, at least for modern Linuxes. @@ -4968,7 +3116,7 @@ index 043e5543889..87c4fed1461 100755 # avoid the 'echo' service on this platform, as there is an # assumption breaking non-standard port/protocol entry services = ('daytime', 'qotd', 'domain') -@@ -1896,12 +1908,13 @@ +@@ -1904,12 +1908,13 @@ self._test_socket_fileno(s, socket.AF_INET6, socket.SOCK_STREAM) if hasattr(socket, "AF_UNIX"): @@ -4985,7 +3133,7 @@ index 043e5543889..87c4fed1461 100755 except PermissionError: pass else: -@@ -3509,7 +3522,7 @@ +@@ -3517,7 +3522,7 @@ def _testFDPassCMSG_LEN(self): self.createAndSendFDs(1) @@ -4994,7 +3142,7 @@ index 043e5543889..87c4fed1461 100755 @unittest.skipIf(AIX, "skipping, see issue #22397") @requireAttrs(socket, "CMSG_SPACE") def testFDPassSeparate(self): -@@ -3520,7 +3533,7 @@ +@@ -3528,7 +3533,7 @@ maxcmsgs=2) @testFDPassSeparate.client_skip @@ -5003,7 +3151,7 @@ index 043e5543889..87c4fed1461 100755 @unittest.skipIf(AIX, "skipping, see issue #22397") def _testFDPassSeparate(self): fd0, fd1 = self.newFDs(2) -@@ -3533,7 +3546,7 @@ +@@ -3541,7 +3546,7 @@ array.array("i", [fd1]))]), len(MSG)) @@ -5012,7 +3160,7 @@ index 043e5543889..87c4fed1461 100755 @unittest.skipIf(AIX, "skipping, see issue #22397") @requireAttrs(socket, "CMSG_SPACE") def testFDPassSeparateMinSpace(self): -@@ -3547,7 +3560,7 @@ +@@ -3555,7 +3560,7 @@ maxcmsgs=2, ignoreflags=socket.MSG_CTRUNC) @testFDPassSeparateMinSpace.client_skip @@ -5021,7 +3169,7 @@ index 043e5543889..87c4fed1461 100755 @unittest.skipIf(AIX, "skipping, see issue #22397") def _testFDPassSeparateMinSpace(self): fd0, fd1 = self.newFDs(2) -@@ -3571,7 +3584,7 @@ +@@ -3579,7 +3584,7 @@ nbytes = self.sendmsgToServer([msg]) self.assertEqual(nbytes, len(msg)) @@ -5030,119 +3178,6 @@ index 043e5543889..87c4fed1461 100755 def testFDPassEmpty(self): # Try to pass an empty FD array. Can receive either no array # or an empty array. -@@ -4613,6 +4626,112 @@ - self.assertEqual(msg, MSG) - - -+class PurePythonSocketPairTest(SocketPairTest): -+ # Explicitly use socketpair AF_INET or AF_INET6 to ensure that is the -+ # code path we're using regardless platform is the pure python one where -+ # `_socket.socketpair` does not exist. (AF_INET does not work with -+ # _socket.socketpair on many platforms). -+ def socketpair(self): -+ # called by super().setUp(). -+ try: -+ return socket.socketpair(socket.AF_INET6) -+ except OSError: -+ return socket.socketpair(socket.AF_INET) -+ -+ # Local imports in this class make for easy security fix backporting. -+ -+ def setUp(self): -+ if hasattr(_socket, "socketpair"): -+ self._orig_sp = socket.socketpair -+ # This forces the version using the non-OS provided socketpair -+ # emulation via an AF_INET socket in Lib/socket.py. -+ socket.socketpair = socket._fallback_socketpair -+ else: -+ # This platform already uses the non-OS provided version. -+ self._orig_sp = None -+ super().setUp() -+ -+ def tearDown(self): -+ super().tearDown() -+ if self._orig_sp is not None: -+ # Restore the default socket.socketpair definition. -+ socket.socketpair = self._orig_sp -+ -+ def test_recv(self): -+ msg = self.serv.recv(1024) -+ self.assertEqual(msg, MSG) -+ -+ def _test_recv(self): -+ self.cli.send(MSG) -+ -+ def test_send(self): -+ self.serv.send(MSG) -+ -+ def _test_send(self): -+ msg = self.cli.recv(1024) -+ self.assertEqual(msg, MSG) -+ -+ def test_ipv4(self): -+ cli, srv = socket.socketpair(socket.AF_INET) -+ cli.close() -+ srv.close() -+ -+ def _test_ipv4(self): -+ pass -+ -+ @unittest.skipIf(not hasattr(_socket, 'IPPROTO_IPV6') or -+ not hasattr(_socket, 'IPV6_V6ONLY'), -+ "IPV6_V6ONLY option not supported") -+ @unittest.skipUnless(socket_helper.IPV6_ENABLED, 'IPv6 required for this test') -+ def test_ipv6(self): -+ cli, srv = socket.socketpair(socket.AF_INET6) -+ cli.close() -+ srv.close() -+ -+ def _test_ipv6(self): -+ pass -+ -+ def test_injected_authentication_failure(self): -+ orig_getsockname = socket.socket.getsockname -+ inject_sock = None -+ -+ def inject_getsocketname(self): -+ nonlocal inject_sock -+ sockname = orig_getsockname(self) -+ # Connect to the listening socket ahead of the -+ # client socket. -+ if inject_sock is None: -+ inject_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -+ inject_sock.setblocking(False) -+ try: -+ inject_sock.connect(sockname[:2]) -+ except (BlockingIOError, InterruptedError): -+ pass -+ inject_sock.setblocking(True) -+ return sockname -+ -+ sock1 = sock2 = None -+ try: -+ socket.socket.getsockname = inject_getsocketname -+ with self.assertRaises(OSError): -+ sock1, sock2 = socket.socketpair() -+ finally: -+ socket.socket.getsockname = orig_getsockname -+ if inject_sock: -+ inject_sock.close() -+ if sock1: # This cleanup isn't needed on a successful test. -+ sock1.close() -+ if sock2: -+ sock2.close() -+ -+ def _test_injected_authentication_failure(self): -+ # No-op. Exists for base class threading infrastructure to call. -+ # We could refactor this test into its own lesser class along with the -+ # setUp and tearDown code to construct an ideal; it is simpler to keep -+ # it here and live with extra overhead one this _one_ failure test. -+ pass -+ -+ - class NonBlockingTCPTests(ThreadedTCPSocketTest): - - def __init__(self, methodName='runTest'): diff --git a/Lib/test/test_socketserver.py b/Lib/test/test_socketserver.py index c9455adfd85..88fc1be1a41 100644 --- a/Lib/test/test_socketserver.py @@ -5343,72 +3378,11 @@ index 0ca5c9390db..63c452d35e4 100644 def test_get_makefile_filename(self): makefile = sysconfig.get_makefile_filename() self.assertTrue(os.path.isfile(makefile), makefile) -diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py -index 3df64c78032..2218401e386 100644 ---- a/Lib/test/test_tarfile.py -+++ b/Lib/test/test_tarfile.py -@@ -1113,6 +1113,48 @@ - finally: - tar.close() - -+ def test_pax_header_bad_formats(self): -+ # The fields from the pax header have priority over the -+ # TarInfo. -+ pax_header_replacements = ( -+ b" foo=bar\n", -+ b"0 \n", -+ b"1 \n", -+ b"2 \n", -+ b"3 =\n", -+ b"4 =a\n", -+ b"1000000 foo=bar\n", -+ b"0 foo=bar\n", -+ b"-12 foo=bar\n", -+ b"000000000000000000000000036 foo=bar\n", -+ ) -+ pax_headers = {"foo": "bar"} -+ -+ for replacement in pax_header_replacements: -+ with self.subTest(header=replacement): -+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, -+ encoding="iso8859-1") -+ try: -+ t = tarfile.TarInfo() -+ t.name = "pax" # non-ASCII -+ t.uid = 1 -+ t.pax_headers = pax_headers -+ tar.addfile(t) -+ finally: -+ tar.close() -+ -+ with open(tmpname, "rb") as f: -+ data = f.read() -+ self.assertIn(b"11 foo=bar\n", data) -+ data = data.replace(b"11 foo=bar\n", replacement) -+ -+ with open(tmpname, "wb") as f: -+ f.truncate() -+ f.write(data) -+ -+ with self.assertRaisesRegex(tarfile.ReadError, r"file could not be opened successfully"): -+ tarfile.open(tmpname, encoding="iso8859-1") -+ - - class WriteTestBase(TarTest): - # Put all write tests in here that are supposed to be tested diff --git a/Lib/test/test_tempfile.py b/Lib/test/test_tempfile.py -index 571263d9c95..79fb4e3e95a 100644 +index 8d13c22f2ee..79fb4e3e95a 100644 --- a/Lib/test/test_tempfile.py +++ b/Lib/test/test_tempfile.py -@@ -11,6 +11,7 @@ - import stat - import types - import weakref -+import subprocess - from unittest import mock - - import unittest -@@ -198,6 +199,7 @@ +@@ -199,6 +199,7 @@ @unittest.skipUnless(hasattr(os, 'fork'), "os.fork is required for this test") @@ -5416,7 +3390,7 @@ index 571263d9c95..79fb4e3e95a 100644 def test_process_awareness(self): # ensure that the random source differs between # child and parent. -@@ -464,6 +466,7 @@ +@@ -465,6 +466,7 @@ self.assertEqual(mode, expected) @unittest.skipUnless(has_spawnl, 'os.spawnl not available') @@ -5424,40 +3398,6 @@ index 571263d9c95..79fb4e3e95a 100644 def test_noinherit(self): # _mkstemp_inner file handles are not inherited by child processes -@@ -772,6 +775,33 @@ - finally: - os.rmdir(dir) - -+ @unittest.skipUnless(os.name == "nt", "Only on Windows.") -+ def test_mode_win32(self): -+ # Use icacls.exe to extract the users with some level of access -+ # Main thing we are testing is that the BUILTIN\Users group has -+ # no access. The exact ACL is going to vary based on which user -+ # is running the test. -+ dir = self.do_create() -+ try: -+ out = subprocess.check_output(["icacls.exe", dir], encoding="oem").casefold() -+ finally: -+ os.rmdir(dir) -+ -+ dir = dir.casefold() -+ users = set() -+ found_user = False -+ for line in out.strip().splitlines(): -+ acl = None -+ # First line of result includes our directory -+ if line.startswith(dir): -+ acl = line.removeprefix(dir).strip() -+ elif line and line[:1].isspace(): -+ acl = line.strip() -+ if acl: -+ users.add(acl.partition(":")[0]) -+ -+ self.assertNotIn(r"BUILTIN\Users".casefold(), users) -+ - def test_collision_with_existing_file(self): - # mkdtemp tries another name when a file with - # the chosen name already exists diff --git a/Lib/test/test_thread.py b/Lib/test/test_thread.py index 3f8a539f85e..a47df45e943 100644 --- a/Lib/test/test_thread.py @@ -5561,118 +3501,6 @@ index bcd1dafa5c7..3ac5ed8a3fd 100644 def test_http_body_pipe(self): # A file reading from a pipe. # A pipe cannot be seek'ed. There is no way to determine the -diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py -index 574da5bd696..b862711e414 100644 ---- a/Lib/test/test_urlparse.py -+++ b/Lib/test/test_urlparse.py -@@ -70,7 +70,9 @@ - - class UrlParseTestCase(unittest.TestCase): - -- def checkRoundtrips(self, url, parsed, split): -+ def checkRoundtrips(self, url, parsed, split, url2=None): -+ if url2 is None: -+ url2 = url - result = urllib.parse.urlparse(url) - self.assertEqual(result, parsed) - t = (result.scheme, result.netloc, result.path, -@@ -78,7 +80,7 @@ - self.assertEqual(t, parsed) - # put it back together and it should be the same - result2 = urllib.parse.urlunparse(result) -- self.assertEqual(result2, url) -+ self.assertEqual(result2, url2) - self.assertEqual(result2, result.geturl()) - - # the result of geturl() is a fixpoint; we can always parse it -@@ -104,7 +106,7 @@ - result.query, result.fragment) - self.assertEqual(t, split) - result2 = urllib.parse.urlunsplit(result) -- self.assertEqual(result2, url) -+ self.assertEqual(result2, url2) - self.assertEqual(result2, result.geturl()) - - # check the fixpoint property of re-parsing the result of geturl() -@@ -142,9 +144,39 @@ - - def test_roundtrips(self): - str_cases = [ -+ ('path/to/file', -+ ('', '', 'path/to/file', '', '', ''), -+ ('', '', 'path/to/file', '', '')), -+ ('/path/to/file', -+ ('', '', '/path/to/file', '', '', ''), -+ ('', '', '/path/to/file', '', '')), -+ ('//path/to/file', -+ ('', 'path', '/to/file', '', '', ''), -+ ('', 'path', '/to/file', '', '')), -+ ('////path/to/file', -+ ('', '', '//path/to/file', '', '', ''), -+ ('', '', '//path/to/file', '', '')), -+ ('scheme:path/to/file', -+ ('scheme', '', 'path/to/file', '', '', ''), -+ ('scheme', '', 'path/to/file', '', '')), -+ ('scheme:/path/to/file', -+ ('scheme', '', '/path/to/file', '', '', ''), -+ ('scheme', '', '/path/to/file', '', '')), -+ ('scheme://path/to/file', -+ ('scheme', 'path', '/to/file', '', '', ''), -+ ('scheme', 'path', '/to/file', '', '')), -+ ('scheme:////path/to/file', -+ ('scheme', '', '//path/to/file', '', '', ''), -+ ('scheme', '', '//path/to/file', '', '')), - ('file:///tmp/junk.txt', - ('file', '', '/tmp/junk.txt', '', '', ''), - ('file', '', '/tmp/junk.txt', '', '')), -+ ('file:////tmp/junk.txt', -+ ('file', '', '//tmp/junk.txt', '', '', ''), -+ ('file', '', '//tmp/junk.txt', '', '')), -+ ('file://///tmp/junk.txt', -+ ('file', '', '///tmp/junk.txt', '', '', ''), -+ ('file', '', '///tmp/junk.txt', '', '')), - ('imap://mail.python.org/mbox1', - ('imap', 'mail.python.org', '/mbox1', '', '', ''), - ('imap', 'mail.python.org', '/mbox1', '', '')), -@@ -175,6 +207,38 @@ - for url, parsed, split in str_cases + bytes_cases: - self.checkRoundtrips(url, parsed, split) - -+ def test_roundtrips_normalization(self): -+ str_cases = [ -+ ('///path/to/file', -+ '/path/to/file', -+ ('', '', '/path/to/file', '', '', ''), -+ ('', '', '/path/to/file', '', '')), -+ ('scheme:///path/to/file', -+ 'scheme:/path/to/file', -+ ('scheme', '', '/path/to/file', '', '', ''), -+ ('scheme', '', '/path/to/file', '', '')), -+ ('file:/tmp/junk.txt', -+ 'file:///tmp/junk.txt', -+ ('file', '', '/tmp/junk.txt', '', '', ''), -+ ('file', '', '/tmp/junk.txt', '', '')), -+ ('http:/tmp/junk.txt', -+ 'http:///tmp/junk.txt', -+ ('http', '', '/tmp/junk.txt', '', '', ''), -+ ('http', '', '/tmp/junk.txt', '', '')), -+ ('https:/tmp/junk.txt', -+ 'https:///tmp/junk.txt', -+ ('https', '', '/tmp/junk.txt', '', '', ''), -+ ('https', '', '/tmp/junk.txt', '', '')), -+ ] -+ def _encode(t): -+ return (t[0].encode('ascii'), -+ t[1].encode('ascii'), -+ tuple(x.encode('ascii') for x in t[2]), -+ tuple(x.encode('ascii') for x in t[3])) -+ bytes_cases = [_encode(x) for x in str_cases] -+ for url, url2, parsed, split in str_cases + bytes_cases: -+ self.checkRoundtrips(url, parsed, split, url2) -+ - def test_http_roundtrips(self): - # urllib.parse.urlsplit treats 'http:' as an optimized special case, - # so we test both 'http:' and 'https:' in all the following. diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index 480cb29f35a..c5d1cfa49c3 100644 --- a/Lib/test/test_venv.py @@ -5832,37 +3660,8 @@ index 519a9432abe..0788ec4c6b9 100644 def test_environment_preferred(self): webbrowser = support.import_fresh_module('webbrowser') try: -diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py -index a8114448488..90bc33c37ac 100644 ---- a/Lib/test/test_xml_etree.py -+++ b/Lib/test/test_xml_etree.py -@@ -1619,11 +1619,10 @@ - with self.assertRaises(ValueError): - ET.XMLPullParser(events=('start', 'end', 'bogus')) - -+ @unittest.skipIf(pyexpat.version_info < (2, 6, 0), -+ f'Expat {pyexpat.version_info} does not ' -+ 'support reparse deferral') - def test_flush_reparse_deferral_enabled(self): -- if pyexpat.version_info < (2, 6, 0): -- self.skipTest(f'Expat {pyexpat.version_info} does not ' -- 'support reparse deferral') -- - parser = ET.XMLPullParser(events=('start', 'end')) - - for chunk in (""): -@@ -1655,8 +1654,8 @@ - self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled ' - 'methods not available in C') - parser._parser._parser.SetReparseDeferralEnabled(False) -+ self.assert_event_tags(parser, []) # i.e. no elements started - -- self.assert_event_tags(parser, []) # i.e. no elements started - if ET is pyET: - self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) - diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py -index 17e95eb8623..bc89288ee9f 100644 +index 5809b2c0006..bc89288ee9f 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -19,7 +19,7 @@ @@ -5890,90 +3689,6 @@ index 17e95eb8623..bc89288ee9f 100644 def test_execute_zip64(self): output = subprocess.check_output([self.exe_zip64, sys.executable]) self.assertIn(b'number in executable: 5', output) -@@ -3054,6 +3056,83 @@ - data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)] - zipfile.CompleteDirs._implied_dirs(data) - -+ def test_malformed_paths(self): -+ """ -+ Path should handle malformed paths gracefully. -+ -+ Paths with leading slashes are not visible. -+ -+ Paths with dots are treated like regular files. -+ """ -+ data = io.BytesIO() -+ zf = zipfile.ZipFile(data, "w") -+ zf.writestr("/one-slash.txt", b"content") -+ zf.writestr("//two-slash.txt", b"content") -+ zf.writestr("../parent.txt", b"content") -+ zf.filename = '' -+ root = zipfile.Path(zf) -+ assert list(map(str, root.iterdir())) == ['../'] -+ assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' -+ -+ def test_unsupported_names(self): -+ """ -+ Path segments with special characters are readable. -+ -+ On some platforms or file systems, characters like -+ ``:`` and ``?`` are not allowed, but they are valid -+ in the zip file. -+ """ -+ data = io.BytesIO() -+ zf = zipfile.ZipFile(data, "w") -+ zf.writestr("path?", b"content") -+ zf.writestr("V: NMS.flac", b"fLaC...") -+ zf.filename = '' -+ root = zipfile.Path(zf) -+ contents = root.iterdir() -+ assert next(contents).name == 'path?' -+ assert next(contents).name == 'V: NMS.flac' -+ assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." -+ -+ def test_backslash_not_separator(self): -+ """ -+ In a zip file, backslashes are not separators. -+ """ -+ data = io.BytesIO() -+ zf = zipfile.ZipFile(data, "w") -+ zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") -+ zf.filename = '' -+ root = zipfile.Path(zf) -+ (first,) = root.iterdir() -+ assert not first.is_dir() -+ assert first.name == 'foo\\bar' -+ -+ -+class DirtyZipInfo(zipfile.ZipInfo): -+ """ -+ Bypass name sanitization. -+ """ -+ -+ def __init__(self, filename, *args, **kwargs): -+ super().__init__(filename, *args, **kwargs) -+ self.filename = filename -+ -+ @classmethod -+ def for_name(cls, name, archive): -+ """ -+ Construct the same way that ZipFile.writestr does. -+ -+ TODO: extract this functionality and re-use -+ """ -+ self = cls(filename=name, date_time=time.localtime(time.time())[:6]) -+ self.compress_type = archive.compression -+ self.compress_level = archive.compresslevel -+ if self.filename.endswith('/'): # pragma: no cover -+ self.external_attr = 0o40775 << 16 # drwxrwxr-x -+ self.external_attr |= 0x10 # MS-DOS directory flag -+ else: -+ self.external_attr = 0o600 << 16 # ?rw------- -+ return self -+ - - if __name__ == "__main__": - unittest.main() diff --git a/Lib/test/test_zipimport_support.py b/Lib/test/test_zipimport_support.py index 88561017503..16d26bea7dc 100644 --- a/Lib/test/test_zipimport_support.py @@ -5994,59 +3709,6 @@ index 88561017503..16d26bea7dc 100644 def test_doctest_issue4197(self): # To avoid having to keep two copies of the doctest module's # unit tests in sync, this test works by taking the source of -diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py -index 02509cdf553..aa7943fc28a 100644 ---- a/Lib/test/test_zlib.py -+++ b/Lib/test/test_zlib.py -@@ -16,6 +16,19 @@ - hasattr(zlib.decompressobj(), "copy"), - 'requires Decompress.copy()') - -+def _zlib_runtime_version_tuple(zlib_version=zlib.ZLIB_RUNTIME_VERSION): -+ # Register "1.2.3" as "1.2.3.0" -+ # or "1.2.0-linux","1.2.0.f","1.2.0.f-linux" -+ v = zlib_version.split('-', 1)[0].split('.') -+ if len(v) < 4: -+ v.append('0') -+ elif not v[-1].isnumeric(): -+ v[-1] = '0' -+ return tuple(map(int, v)) -+ -+ -+ZLIB_RUNTIME_VERSION_TUPLE = _zlib_runtime_version_tuple() -+ - - class VersionTestCase(unittest.TestCase): - -@@ -437,9 +450,8 @@ - sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH', - 'Z_PARTIAL_FLUSH'] - -- ver = tuple(int(v) for v in zlib.ZLIB_RUNTIME_VERSION.split('.')) - # Z_BLOCK has a known failure prior to 1.2.5.3 -- if ver >= (1, 2, 5, 3): -+ if ZLIB_RUNTIME_VERSION_TUPLE >= (1, 2, 5, 3): - sync_opt.append('Z_BLOCK') - - sync_opt = [getattr(zlib, opt) for opt in sync_opt -@@ -768,16 +780,7 @@ - - def test_wbits(self): - # wbits=0 only supported since zlib v1.2.3.5 -- # Register "1.2.3" as "1.2.3.0" -- # or "1.2.0-linux","1.2.0.f","1.2.0.f-linux" -- v = zlib.ZLIB_RUNTIME_VERSION.split('-', 1)[0].split('.') -- if len(v) < 4: -- v.append('0') -- elif not v[-1].isnumeric(): -- v[-1] = '0' -- -- v = tuple(map(int, v)) -- supports_wbits_0 = v >= (1, 2, 3, 5) -+ supports_wbits_0 = ZLIB_RUNTIME_VERSION_TUPLE >= (1, 2, 3, 5) - - co = zlib.compressobj(level=1, wbits=15) - zlib15 = co.compress(HAMLET_SCENE) + co.flush() diff --git a/Lib/unittest/test/test_program.py b/Lib/unittest/test/test_program.py index b7fbbc1e7ba..a544819d516 100644 --- a/Lib/unittest/test/test_program.py @@ -6085,19 +3747,6 @@ index 453e6c3d11c..979eccffaea 100644 def test_warnings(self): """ Check that warnings argument of TextTestRunner correctly affects the -diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py -index 5b7193f67c6..4a24f7eb5e4 100644 ---- a/Lib/urllib/parse.py -+++ b/Lib/urllib/parse.py -@@ -522,7 +522,7 @@ - empty query; the RFC states that these are equivalent).""" - scheme, netloc, url, query, fragment, _coerce_result = ( - _coerce_args(*components)) -- if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): -+ if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//': - if url and url[:1] != '/': url = '/' + url - url = '//' + (netloc or '') + url - if scheme: diff --git a/Lib/webbrowser.py b/Lib/webbrowser.py index 6023c1e1384..9344f600e9a 100755 --- a/Lib/webbrowser.py @@ -6183,35 +3832,6 @@ index 6023c1e1384..9344f600e9a 100755 def main(): import getopt -diff --git a/Lib/zipfile.py b/Lib/zipfile.py -index 95f95ee1126..68d643ddbdd 100644 ---- a/Lib/zipfile.py -+++ b/Lib/zipfile.py -@@ -2146,7 +2146,7 @@ - def _ancestry(path): - """ - Given a path with elements separated by -- posixpath.sep, generate all elements of that path -+ posixpath.sep, generate all elements of that path. - - >>> list(_ancestry('b/d')) - ['b/d', 'b'] -@@ -2158,9 +2158,14 @@ - ['b'] - >>> list(_ancestry('')) - [] -+ -+ Multiple separators are treated like a single. -+ -+ >>> list(_ancestry('//b//d///f//')) -+ ['//b//d///f', '//b//d', '//b'] - """ - path = path.rstrip(posixpath.sep) -- while path and path != posixpath.sep: -+ while path.rstrip(posixpath.sep): - yield path - path, tail = posixpath.split(path) - --- /dev/null +++ b/Mac/Resources/app-store-compliance.patch @@ -0,0 +1 @@ @@ -6511,477 +4131,6 @@ index a276d535c7f..4da3462959e 100644 .PHONY: frameworkinstallmaclib frameworkinstallapps frameworkinstallunixtools .PHONY: frameworkaltinstallunixtools recheck clean clobber distclean .PHONY: smelly funny patchcheck touch altmaninstall commoninstall -diff --git a/Modules/_ssl.c b/Modules/_ssl.c -index 3f95d3e1037..5e0be34d6f3 100644 ---- a/Modules/_ssl.c -+++ b/Modules/_ssl.c -@@ -166,6 +166,10 @@ - # define PY_OPENSSL_1_1_API 1 - #endif - -+#if (OPENSSL_VERSION_NUMBER >= 0x30300000L) && !defined(LIBRESSL_VERSION_NUMBER) -+# define OPENSSL_VERSION_3_3 1 -+#endif -+ - /* SNI support (client- and server-side) appeared in OpenSSL 1.0.0 and 0.9.8f - * This includes the SSL_set_SSL_CTX() function. - */ -@@ -210,6 +214,16 @@ - #define HAVE_OPENSSL_CRYPTO_LOCK - #endif - -+/* OpenSSL 1.1+ allows locking X509_STORE, 1.0.2 doesn't. */ -+#ifdef OPENSSL_VERSION_1_1 -+#define HAVE_OPENSSL_X509_STORE_LOCK -+#endif -+ -+/* OpenSSL 3.3 added the X509_STORE_get1_objects API */ -+#ifdef OPENSSL_VERSION_3_3 -+#define HAVE_OPENSSL_X509_STORE_GET1_OBJECTS 1 -+#endif -+ - #if defined(OPENSSL_VERSION_1_1) && !defined(OPENSSL_NO_SSL2) - #define OPENSSL_NO_SSL2 - #endif -@@ -4675,6 +4689,54 @@ - #endif - } - -+/* Shim of X509_STORE_get1_objects API from OpenSSL 3.3 -+ * Only available with the X509_STORE_lock() API */ -+#if defined(HAVE_OPENSSL_X509_STORE_LOCK) && !defined(OPENSSL_VERSION_3_3) -+#define HAVE_OPENSSL_X509_STORE_GET1_OBJECTS 1 -+ -+static X509_OBJECT *x509_object_dup(const X509_OBJECT *obj) -+{ -+ int ok; -+ X509_OBJECT *ret = X509_OBJECT_new(); -+ if (ret == NULL) { -+ return NULL; -+ } -+ switch (X509_OBJECT_get_type(obj)) { -+ case X509_LU_X509: -+ ok = X509_OBJECT_set1_X509(ret, X509_OBJECT_get0_X509(obj)); -+ break; -+ case X509_LU_CRL: -+ /* X509_OBJECT_get0_X509_CRL was not const-correct prior to 3.0.*/ -+ ok = X509_OBJECT_set1_X509_CRL( -+ ret, X509_OBJECT_get0_X509_CRL((X509_OBJECT *)obj)); -+ break; -+ default: -+ /* We cannot duplicate unrecognized types in a polyfill, but it is -+ * safe to leave an empty object. The caller will ignore it. */ -+ ok = 1; -+ break; -+ } -+ if (!ok) { -+ X509_OBJECT_free(ret); -+ return NULL; -+ } -+ return ret; -+} -+ -+static STACK_OF(X509_OBJECT) * -+X509_STORE_get1_objects(X509_STORE *store) -+{ -+ STACK_OF(X509_OBJECT) *ret; -+ if (!X509_STORE_lock(store)) { -+ return NULL; -+ } -+ ret = sk_X509_OBJECT_deep_copy(X509_STORE_get0_objects(store), -+ x509_object_dup, X509_OBJECT_free); -+ X509_STORE_unlock(store); -+ return ret; -+} -+#endif -+ - PyDoc_STRVAR(PySSLContext_sni_callback_doc, - "Set a callback that will be called when a server name is provided by the SSL/TLS client in the SNI extension.\n\ - \n\ -@@ -4704,7 +4766,15 @@ - int x509 = 0, crl = 0, ca = 0, i; - - store = SSL_CTX_get_cert_store(self->ctx); -+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS -+ objs = X509_STORE_get1_objects(store); -+ if (objs == NULL) { -+ PyErr_SetString(PyExc_MemoryError, "failed to query cert store"); -+ return NULL; -+ } -+#else - objs = X509_STORE_get0_objects(store); -+#endif - for (i = 0; i < sk_X509_OBJECT_num(objs); i++) { - obj = sk_X509_OBJECT_value(objs, i); - switch (X509_OBJECT_get_type(obj)) { -@@ -4718,12 +4788,13 @@ - crl++; - break; - default: -- /* Ignore X509_LU_FAIL, X509_LU_RETRY, X509_LU_PKEY. -- * As far as I can tell they are internal states and never -- * stored in a cert store */ -+ /* Ignore unrecognized types. */ - break; - } - } -+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS -+ sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free); -+#endif - return Py_BuildValue("{sisisi}", "x509", x509, "crl", crl, - "x509_ca", ca); - } -@@ -4755,7 +4826,15 @@ - } - - store = SSL_CTX_get_cert_store(self->ctx); -+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS -+ objs = X509_STORE_get1_objects(store); -+ if (objs == NULL) { -+ PyErr_SetString(PyExc_MemoryError, "failed to query cert store"); -+ return NULL; -+ } -+#else - objs = X509_STORE_get0_objects(store); -+#endif - for (i = 0; i < sk_X509_OBJECT_num(objs); i++) { - X509_OBJECT *obj; - X509 *cert; -@@ -4783,9 +4862,15 @@ - } - Py_CLEAR(ci); - } -+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS -+ sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free); -+#endif - return rlist; - - error: -+#if HAVE_OPENSSL_X509_STORE_GET1_OBJECTS -+ sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free); -+#endif - Py_XDECREF(ci); - Py_XDECREF(rlist); - return NULL; -diff --git a/Modules/_winapi.c b/Modules/_winapi.c -index 1e0e4ec8b3d..ffcdafa48c0 100644 ---- a/Modules/_winapi.c -+++ b/Modules/_winapi.c -@@ -461,7 +461,7 @@ - { - HANDLE handle; - -- if (PySys_Audit("_winapi.CreateFile", "uIIII", -+ if (PySys_Audit("_winapi.CreateFile", "sIIII", - file_name, desired_access, share_mode, - creation_disposition, flags_and_attributes) < 0) { - return INVALID_HANDLE_VALUE; -@@ -681,7 +681,7 @@ - { - HANDLE handle; - -- if (PySys_Audit("_winapi.CreateNamedPipe", "uII", -+ if (PySys_Audit("_winapi.CreateNamedPipe", "sII", - name, open_mode, pipe_mode) < 0) { - return INVALID_HANDLE_VALUE; - } -diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h -index 95464b0dd17..d0d6015a662 100644 ---- a/Modules/expat/expat.h -+++ b/Modules/expat/expat.h -@@ -18,6 +18,7 @@ - Copyright (c) 2022 Thijs Schreijer - Copyright (c) 2023 Hanno Böck - Copyright (c) 2023 Sony Corporation / Snild Dolkow -+ Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> - Licensed under the MIT license: - - Permission is hereby granted, free of charge, to any person obtaining -@@ -1042,7 +1043,7 @@ - XMLPARSEAPI(const XML_Feature *) - XML_GetFeatureList(void); - --#if XML_GE == 1 -+#if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1) - /* Added in Expat 2.4.0 for XML_DTD defined and - * added in Expat 2.6.0 for XML_GE == 1. */ - XMLPARSEAPI(XML_Bool) -@@ -1065,7 +1066,7 @@ - */ - #define XML_MAJOR_VERSION 2 - #define XML_MINOR_VERSION 6 --#define XML_MICRO_VERSION 0 -+#define XML_MICRO_VERSION 3 - - #ifdef __cplusplus - } -diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h -index cce71e4c516..167ec36804a 100644 ---- a/Modules/expat/internal.h -+++ b/Modules/expat/internal.h -@@ -28,10 +28,11 @@ - Copyright (c) 2002-2003 Fred L. Drake, Jr. - Copyright (c) 2002-2006 Karl Waclawek - Copyright (c) 2003 Greg Stein -- Copyright (c) 2016-2023 Sebastian Pipping -+ Copyright (c) 2016-2024 Sebastian Pipping - Copyright (c) 2018 Yury Gribov - Copyright (c) 2019 David Loffredo -- Copyright (c) 2023 Sony Corporation / Snild Dolkow -+ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow -+ Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> - Licensed under the MIT license: - - Permission is hereby granted, free of charge, to any person obtaining -@@ -155,14 +156,20 @@ - void _INTERNAL_trim_to_complete_utf8_characters(const char *from, - const char **fromLimRef); - --#if XML_GE == 1 -+#if defined(XML_GE) && XML_GE == 1 - unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); - unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); - const char *unsignedCharToPrintable(unsigned char c); - #endif - --extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c --extern unsigned int g_parseAttempts; // used for testing only -+extern -+#if ! defined(XML_TESTING) -+ const -+#endif -+ XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c -+#if defined(XML_TESTING) -+extern unsigned int g_bytesScanned; // used for testing only -+#endif - - #ifdef __cplusplus - } -diff --git a/Modules/expat/siphash.h b/Modules/expat/siphash.h -index a1ed99e687b..04f6f74585b 100644 ---- a/Modules/expat/siphash.h -+++ b/Modules/expat/siphash.h -@@ -126,8 +126,7 @@ - | ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) \ - | ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56)) - --#define SIPHASH_INITIALIZER \ -- { 0, 0, 0, 0, {0}, 0, 0 } -+#define SIPHASH_INITIALIZER {0, 0, 0, 0, {0}, 0, 0} - - struct siphash { - uint64_t v0, v1, v2, v3; -diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c -index aaf0fa9c8f9..d9285b213b3 100644 ---- a/Modules/expat/xmlparse.c -+++ b/Modules/expat/xmlparse.c -@@ -1,4 +1,4 @@ --/* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+) -+/* ba4cdf9bdb534f355a9def4c9e25d20ee8e72f95b0a4d930be52e563f5080196 (2.6.3+) - __ __ _ - ___\ \/ /_ __ __ _| |_ - / _ \\ /| '_ \ / _` | __| -@@ -38,7 +38,8 @@ - Copyright (c) 2022 Jann Horn - Copyright (c) 2022 Sean McBride - Copyright (c) 2023 Owain Davies -- Copyright (c) 2023 Sony Corporation / Snild Dolkow -+ Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow -+ Copyright (c) 2024 Berkay Eren Ürün - Licensed under the MIT license: - - Permission is hereby granted, free of charge, to any person obtaining -@@ -210,7 +211,7 @@ - #endif - - /* Round up n to be a multiple of sz, where sz is a power of 2. */ --#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1)) -+#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) - - /* Do safe (NULL-aware) pointer arithmetic */ - #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) -@@ -248,7 +249,7 @@ - it odd, since odd numbers are always relative prime to a power of 2. - */ - #define SECOND_HASH(hash, mask, power) \ -- ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2)) -+ ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) - #define PROBE_STEP(hash, mask, power) \ - ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) - -@@ -294,7 +295,7 @@ - The name of the element is stored in both the document and API - encodings. The memory buffer 'buf' is a separately-allocated - memory area which stores the name. During the XML_Parse()/ -- XMLParseBuffer() when the element is open, the memory for the 'raw' -+ XML_ParseBuffer() when the element is open, the memory for the 'raw' - version of the name (in the document encoding) is shared with the - document buffer. If the element is open across calls to - XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to -@@ -629,8 +630,14 @@ - ? 0 \ - : ((*((pool)->ptr)++ = c), 1)) - --XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c --unsigned int g_parseAttempts = 0; // used for testing only -+#if ! defined(XML_TESTING) -+const -+#endif -+ XML_Bool g_reparseDeferralEnabledDefault -+ = XML_TRUE; // write ONLY in runtests.c -+#if defined(XML_TESTING) -+unsigned int g_bytesScanned = 0; // used for testing only -+#endif - - struct XML_ParserStruct { - /* The first member must be m_userData so that the XML_GetUserData -@@ -1017,7 +1024,9 @@ - return XML_ERROR_NONE; - } - } -- g_parseAttempts += 1; -+#if defined(XML_TESTING) -+ g_bytesScanned += (unsigned)have_now; -+#endif - const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); - if (ret == XML_ERROR_NONE) { - // if we consumed nothing, remember what we had on this parse attempt. -@@ -2030,6 +2039,12 @@ - - if (parser == NULL) - return XML_STATUS_ERROR; -+ -+ if (len < 0) { -+ parser->m_errorCode = XML_ERROR_INVALID_ARGUMENT; -+ return XML_STATUS_ERROR; -+ } -+ - switch (parser->m_parsingStatus.parsing) { - case XML_SUSPENDED: - parser->m_errorCode = XML_ERROR_SUSPENDED; -@@ -5838,18 +5853,17 @@ - /* Set a safe default value in case 'next' does not get set */ - next = textStart; - --#ifdef XML_DTD - if (entity->is_param) { - int tok - = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_FALSE, - XML_ACCOUNT_ENTITY_EXPANSION); -- } else --#endif /* XML_DTD */ -+ } else { - result = doContent(parser, parser->m_tagLevel, parser->m_internalEncoding, - textStart, textEnd, &next, XML_FALSE, - XML_ACCOUNT_ENTITY_EXPANSION); -+ } - - if (result == XML_ERROR_NONE) { - if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { -@@ -5886,18 +5900,17 @@ - /* Set a safe default value in case 'next' does not get set */ - next = textStart; - --#ifdef XML_DTD - if (entity->is_param) { - int tok - = XmlPrologTok(parser->m_internalEncoding, textStart, textEnd, &next); - result = doProlog(parser, parser->m_internalEncoding, textStart, textEnd, - tok, next, &next, XML_FALSE, XML_TRUE, - XML_ACCOUNT_ENTITY_EXPANSION); -- } else --#endif /* XML_DTD */ -+ } else { - result = doContent(parser, openEntity->startTagLevel, - parser->m_internalEncoding, textStart, textEnd, &next, - XML_FALSE, XML_ACCOUNT_ENTITY_EXPANSION); -+ } - - if (result != XML_ERROR_NONE) - return result; -@@ -5924,7 +5937,6 @@ - return XML_ERROR_NONE; - } - --#ifdef XML_DTD - if (entity->is_param) { - int tok; - parser->m_processor = prologProcessor; -@@ -5932,9 +5944,7 @@ - return doProlog(parser, parser->m_encoding, s, end, tok, next, nextPtr, - (XML_Bool)! parser->m_parsingStatus.finalBuffer, XML_TRUE, - XML_ACCOUNT_DIRECT); -- } else --#endif /* XML_DTD */ -- { -+ } else { - parser->m_processor = contentProcessor; - /* see externalEntityContentProcessor vs contentProcessor */ - result = doContent(parser, parser->m_parentParser ? 1 : 0, -@@ -6232,7 +6242,7 @@ - dtd->keepProcessing = dtd->standalone; - goto endEntityValue; - } -- if (entity->open) { -+ if (entity->open || (entity == parser->m_declEntity)) { - if (enc == parser->m_encoding) - parser->m_eventPtr = entityTextPtr; - result = XML_ERROR_RECURSIVE_ENTITY_REF; -@@ -7008,6 +7018,16 @@ - if (! newE) - return 0; - if (oldE->nDefaultAtts) { -+ /* Detect and prevent integer overflow. -+ * The preprocessor guard addresses the "always false" warning -+ * from -Wtype-limits on platforms where -+ * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ -+#if UINT_MAX >= SIZE_MAX -+ if ((size_t)oldE->nDefaultAtts -+ > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) { -+ return 0; -+ } -+#endif - newE->defaultAtts - = ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); - if (! newE->defaultAtts) { -@@ -7550,6 +7570,15 @@ - int next; - - if (! dtd->scaffIndex) { -+ /* Detect and prevent integer overflow. -+ * The preprocessor guard addresses the "always false" warning -+ * from -Wtype-limits on platforms where -+ * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ -+#if UINT_MAX >= SIZE_MAX -+ if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) { -+ return -1; -+ } -+#endif - dtd->scaffIndex = (int *)MALLOC(parser, parser->m_groupSize * sizeof(int)); - if (! dtd->scaffIndex) - return -1; -@@ -7779,6 +7808,8 @@ - - static float - accountingGetCurrentAmplification(XML_Parser rootParser) { -+ // 1.........1.........12 => 22 -+ const size_t lenOfShortestInclude = sizeof("") - 1; - const XmlBigCount countBytesOutput - = rootParser->m_accounting.countBytesDirect - + rootParser->m_accounting.countBytesIndirect; -@@ -7786,7 +7817,9 @@ - = rootParser->m_accounting.countBytesDirect - ? (countBytesOutput - / (float)(rootParser->m_accounting.countBytesDirect)) -- : 1.0f; -+ : ((lenOfShortestInclude -+ + rootParser->m_accounting.countBytesIndirect) -+ / (float)lenOfShortestInclude); - assert(! rootParser->m_parentParser); - return amplificationFactor; - } diff --git a/Modules/getpath.c b/Modules/getpath.c index 5dbe57c9509..6deb125b39f 100644 --- a/Modules/getpath.c @@ -7012,68 +4161,11 @@ index 5dbe57c9509..6deb125b39f 100644 status = calculate_argv0_path_framework(calculate, pathconfig); if (_PyStatus_EXCEPTION(status)) { return status; -diff --git a/Modules/main.c b/Modules/main.c -index 2cc891f61aa..903de3f13f3 100644 ---- a/Modules/main.c -+++ b/Modules/main.c -@@ -534,6 +534,10 @@ - return; - } - -+ if (PySys_Audit("cpython.run_stdin", NULL) < 0) { -+ return; -+ } -+ - int res = PyRun_AnyFileFlags(stdin, "", cf); - *exitcode = (res != 0); - } -diff --git a/Modules/overlapped.c b/Modules/overlapped.c -index cd7869fa8aa..5f8a8234735 100644 ---- a/Modules/overlapped.c -+++ b/Modules/overlapped.c -@@ -624,6 +624,24 @@ - if (!HasOverlappedIoCompleted(&self->overlapped) && - self->type != TYPE_NOT_STARTED) - { -+ // NOTE: We should not get here, if we do then something is wrong in -+ // the IocpProactor or ProactorEventLoop. Since everything uses IOCP if -+ // the overlapped IO hasn't completed yet then we should not be -+ // deallocating! -+ // -+ // The problem is likely that this OverlappedObject was removed from -+ // the IocpProactor._cache before it was complete. The _cache holds a -+ // reference while IO is pending so that it does not get deallocated -+ // while the kernel has retained the OVERLAPPED structure. -+ // -+ // CancelIoEx (likely called from self.cancel()) may have successfully -+ // completed, but the OVERLAPPED is still in use until either -+ // HasOverlappedIoCompleted() is true or GetQueuedCompletionStatus has -+ // returned this OVERLAPPED object. -+ // -+ // NOTE: Waiting when IOCP is in use can hang indefinitely, but this -+ // CancelIoEx is superfluous in that self.cancel() was already called, -+ // so I've only ever seen this return FALSE with GLE=ERROR_NOT_FOUND - if (Py_CancelIoEx && Py_CancelIoEx(self->handle, &self->overlapped)) - wait = TRUE; - diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c -index bf4e648421d..ab5b6b82344 100644 +index f498d40e34c..ab5b6b82344 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c -@@ -24,6 +24,12 @@ - #include "pycore_ceval.h" // _PyEval_ReInitThreads() - #include "pycore_import.h" // _PyImport_ReInitLock() - #include "pycore_pystate.h" // _PyInterpreterState_GET() -+ -+#ifdef MS_WINDOWS -+# include // SetEntriesInAcl -+# include // SDDL_REVISION_1 -+#endif -+ - #include "structmember.h" // PyMemberDef - #ifndef MS_WINDOWS - # include "posixmodule.h" -@@ -317,8 +323,6 @@ +@@ -323,8 +323,6 @@ # else /* Unix functions that the configure script doesn't check for */ # ifndef __VXWORKS__ @@ -7082,7 +4174,7 @@ index bf4e648421d..ab5b6b82344 100644 # if defined(__USLC__) && defined(__SCO_VERSION__) /* SCO UDK Compiler */ # define HAVE_FORK1 1 # endif -@@ -331,7 +335,6 @@ +@@ -337,7 +335,6 @@ # define HAVE_KILL 1 # define HAVE_OPENDIR 1 # define HAVE_PIPE 1 @@ -7090,80 +4182,6 @@ index bf4e648421d..ab5b6b82344 100644 # define HAVE_WAIT 1 # define HAVE_TTYNAME 1 # endif /* _MSC_VER */ -@@ -583,10 +586,10 @@ - { - _PyRuntimeState *runtime = &_PyRuntime; - _PyGILState_Reinit(runtime); -+ _PyRuntimeState_ReInitThreads(runtime); - _PyEval_ReInitThreads(runtime); - _PyImport_ReInitLock(); - _PySignal_AfterFork(); -- _PyRuntimeState_ReInitThreads(runtime); - _PyInterpreterState_DeleteExceptMain(runtime); - - run_at_forkers(_PyInterpreterState_GET()->after_forkers_child, 0); -@@ -4425,7 +4428,6 @@ - - #endif /* MS_WINDOWS */ - -- - /*[clinic input] - os.mkdir - -@@ -4454,6 +4456,12 @@ - /*[clinic end generated code: output=a70446903abe821f input=e965f68377e9b1ce]*/ - { - int result; -+#ifdef MS_WINDOWS -+ int error = 0; -+ int pathError = 0; -+ SECURITY_ATTRIBUTES secAttr = { sizeof(secAttr) }; -+ SECURITY_ATTRIBUTES *pSecAttr = NULL; -+#endif - #ifdef HAVE_MKDIRAT - int mkdirat_unavailable = 0; - #endif -@@ -4465,11 +4473,38 @@ - - #ifdef MS_WINDOWS - Py_BEGIN_ALLOW_THREADS -- result = CreateDirectoryW(path->wide, NULL); -+ if (mode == 0700 /* 0o700 */) { -+ ULONG sdSize; -+ pSecAttr = &secAttr; -+ // Set a discretionary ACL (D) that is protected (P) and includes -+ // inheritable (OICI) entries that allow (A) full control (FA) to -+ // SYSTEM (SY), Administrators (BA), and the owner (OW). -+ if (!ConvertStringSecurityDescriptorToSecurityDescriptorW( -+ L"D:P(A;OICI;FA;;;SY)(A;OICI;FA;;;BA)(A;OICI;FA;;;OW)", -+ SDDL_REVISION_1, -+ &secAttr.lpSecurityDescriptor, -+ &sdSize -+ )) { -+ error = GetLastError(); -+ } -+ } -+ if (!error) { -+ result = CreateDirectoryW(path->wide, pSecAttr); -+ if (secAttr.lpSecurityDescriptor && -+ // uncommonly, LocalFree returns non-zero on error, but still uses -+ // GetLastError() to see what the error code is -+ LocalFree(secAttr.lpSecurityDescriptor)) { -+ error = GetLastError(); -+ } -+ } - Py_END_ALLOW_THREADS - -- if (!result) -+ if (error) { -+ return PyErr_SetFromWindowsErr(error); -+ } -+ if (!result) { - return path_error(path); -+ } - #else - Py_BEGIN_ALLOW_THREADS - #if HAVE_MKDIRAT diff --git a/Python/importlib_external.h b/Python/importlib_external.h index 23af2cd20b9..27ff2fa3f79 100644 --- a/Python/importlib_external.h @@ -12776,16 +9794,6 @@ index 651168da9fc..59b2694e309 100644 #endif #define TYPE_NULL '0' -diff --git a/README.rst b/README.rst -index 592b98880af..abe33630048 100644 ---- a/README.rst -+++ b/README.rst -@@ -1,4 +1,4 @@ --This is Python version 3.9.19 -+This is Python version 3.9.20 - ============================= - - .. image:: https://travis-ci.org/python/cpython.svg?branch=3.9 diff --git a/config.sub b/config.sub index d74fb6deac9..1bb6a05dc11 100755 --- a/config.sub