From 7146f1894638130940944d4808dae7d144d46227 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 12 Dec 2024 17:39:24 +0000 Subject: [PATCH 01/19] GH-127807: pathlib ABCs: remove `PathBase._unsupported_msg()` (#127855) This method helped us customise the `UnsupportedOperation` message depending on the type. But we're aiming to make `PathBase` a proper ABC soon, so `NotImplementedError` is the right exception to raise there. --- Lib/pathlib/__init__.py | 4 +-- Lib/pathlib/_abc.py | 35 +++++++-------------- Lib/pathlib/_local.py | 37 ++++++++++++++++++----- Lib/test/test_pathlib/test_pathlib.py | 8 +++++ Lib/test/test_pathlib/test_pathlib_abc.py | 12 ++------ 5 files changed, 52 insertions(+), 44 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 5da3acd31997e5..ec1bac9ef49350 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -5,8 +5,6 @@ operating systems. """ -from pathlib._abc import * from pathlib._local import * -__all__ = (_abc.__all__ + - _local.__all__) +__all__ = _local.__all__ diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b10aba85132332..b4560295300c28 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -20,15 +20,6 @@ from pathlib._os import copyfileobj -__all__ = ["UnsupportedOperation"] - - -class UnsupportedOperation(NotImplementedError): - """An exception that is raised when an unsupported operation is attempted. - """ - pass - - @functools.cache def _is_case_sensitive(parser): return parser.normcase('Aa') == 'Aa' @@ -353,8 +344,8 @@ class PathBase(PurePathBase): This class provides dummy implementations for many methods that derived classes can override selectively; the default implementations raise - UnsupportedOperation. The most basic methods, such as stat() and open(), - directly raise UnsupportedOperation; these basic methods are called by + NotImplementedError. The most basic methods, such as stat() and open(), + directly raise NotImplementedError; these basic methods are called by other methods such as is_dir() and read_text(). The Path class derives this class to implement local filesystem paths. @@ -363,16 +354,12 @@ class PathBase(PurePathBase): """ __slots__ = () - @classmethod - def _unsupported_msg(cls, attribute): - return f"{cls.__name__}.{attribute} is unsupported" - def stat(self, *, follow_symlinks=True): """ Return the result of the stat() system call on this path, like os.stat() does. """ - raise UnsupportedOperation(self._unsupported_msg('stat()')) + raise NotImplementedError # Convenience functions for querying the stat results @@ -448,7 +435,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed to by this path and return a file object, as the built-in open() function does. """ - raise UnsupportedOperation(self._unsupported_msg('open()')) + raise NotImplementedError def read_bytes(self): """ @@ -498,7 +485,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - raise UnsupportedOperation(self._unsupported_msg('iterdir()')) + raise NotImplementedError def _glob_selector(self, parts, case_sensitive, recurse_symlinks): if case_sensitive is None: @@ -575,14 +562,14 @@ def readlink(self): """ Return the path to which the symbolic link points. """ - raise UnsupportedOperation(self._unsupported_msg('readlink()')) + raise NotImplementedError def symlink_to(self, target, target_is_directory=False): """ Make this path a symlink pointing to the target path. Note the order of arguments (link, target) is the reverse of os.symlink. """ - raise UnsupportedOperation(self._unsupported_msg('symlink_to()')) + raise NotImplementedError def _symlink_to_target_of(self, link): """ @@ -595,7 +582,7 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. """ - raise UnsupportedOperation(self._unsupported_msg('mkdir()')) + raise NotImplementedError # Metadata keys supported by this path type. _readable_metadata = _writable_metadata = frozenset() @@ -604,13 +591,13 @@ def _read_metadata(self, keys=None, *, follow_symlinks=True): """ Returns path metadata as a dict with string keys. """ - raise UnsupportedOperation(self._unsupported_msg('_read_metadata()')) + raise NotImplementedError def _write_metadata(self, metadata, *, follow_symlinks=True): """ Sets path metadata from the given dict with string keys. """ - raise UnsupportedOperation(self._unsupported_msg('_write_metadata()')) + raise NotImplementedError def _copy_metadata(self, target, *, follow_symlinks=True): """ @@ -687,7 +674,7 @@ def _delete(self): """ Delete this file or directory (including all sub-directories). """ - raise UnsupportedOperation(self._unsupported_msg('_delete()')) + raise NotImplementedError def move(self, target): """ diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 0dfe9d2390ecff..b933dd512eeb28 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -21,15 +21,22 @@ from pathlib._os import (copyfile, file_metadata_keys, read_file_metadata, write_file_metadata) -from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase +from pathlib._abc import PurePathBase, PathBase __all__ = [ + "UnsupportedOperation", "PurePath", "PurePosixPath", "PureWindowsPath", "Path", "PosixPath", "WindowsPath", ] +class UnsupportedOperation(NotImplementedError): + """An exception that is raised when an unsupported operation is attempted. + """ + pass + + class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" @@ -527,10 +534,6 @@ class Path(PathBase, PurePath): """ __slots__ = () - @classmethod - def _unsupported_msg(cls, attribute): - return f"{cls.__name__}.{attribute} is unsupported on this system" - def __new__(cls, *args, **kwargs): if cls is Path: cls = WindowsPath if os.name == 'nt' else PosixPath @@ -817,7 +820,8 @@ def owner(self, *, follow_symlinks=True): """ Return the login name of the file owner. """ - raise UnsupportedOperation(self._unsupported_msg('owner()')) + f = f"{type(self).__name__}.owner()" + raise UnsupportedOperation(f"{f} is unsupported on this system") if grp: def group(self, *, follow_symlinks=True): @@ -831,7 +835,8 @@ def group(self, *, follow_symlinks=True): """ Return the group name of the file gid. """ - raise UnsupportedOperation(self._unsupported_msg('group()')) + f = f"{type(self).__name__}.group()" + raise UnsupportedOperation(f"{f} is unsupported on this system") if hasattr(os, "readlink"): def readlink(self): @@ -839,6 +844,13 @@ def readlink(self): Return the path to which the symbolic link points. """ return self.with_segments(os.readlink(self)) + else: + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + f = f"{type(self).__name__}.readlink()" + raise UnsupportedOperation(f"{f} is unsupported on this system") def touch(self, mode=0o666, exist_ok=True): """ @@ -989,6 +1001,14 @@ def symlink_to(self, target, target_is_directory=False): Note the order of arguments (link, target) is the reverse of os.symlink. """ os.symlink(target, self, target_is_directory) + else: + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + f = f"{type(self).__name__}.symlink_to()" + raise UnsupportedOperation(f"{f} is unsupported on this system") if os.name == 'nt': def _symlink_to_target_of(self, link): @@ -1013,7 +1033,8 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ - raise UnsupportedOperation(self._unsupported_msg('hardlink_to()')) + f = f"{type(self).__name__}.hardlink_to()" + raise UnsupportedOperation(f"{f} is unsupported on this system") def expanduser(self): """ Return a new path with expanded ~ and ~user constructs diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index b57ef420bfcbcd..68bff2cf0d511e 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -63,6 +63,14 @@ def needs_symlinks(fn): _tests_needing_symlinks.add(fn.__name__) return fn + + +class UnsupportedOperationTest(unittest.TestCase): + def test_is_notimplemented(self): + self.assertTrue(issubclass(pathlib.UnsupportedOperation, NotImplementedError)) + self.assertTrue(isinstance(pathlib.UnsupportedOperation(), NotImplementedError)) + + # # Tests for the pure classes. # diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index d770b87dc6a104..e230dd188799a5 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -5,7 +5,7 @@ import stat import unittest -from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase +from pathlib._abc import PurePathBase, PathBase from pathlib._types import Parser import posixpath @@ -27,11 +27,6 @@ def needs_windows(fn): return fn -class UnsupportedOperationTest(unittest.TestCase): - def test_is_notimplemented(self): - self.assertTrue(issubclass(UnsupportedOperation, NotImplementedError)) - self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError)) - # # Tests for the pure classes. # @@ -1294,10 +1289,9 @@ def test_is_absolute_windows(self): class PathBaseTest(PurePathBaseTest): cls = PathBase - def test_unsupported_operation(self): - P = self.cls + def test_not_implemented_error(self): p = self.cls('') - e = UnsupportedOperation + e = NotImplementedError self.assertRaises(e, p.stat) self.assertRaises(e, p.exists) self.assertRaises(e, p.is_dir) From f8dcb8200626a1a06c4a26d8129257f42658a9ff Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 12 Dec 2024 17:59:13 +0000 Subject: [PATCH 02/19] gh-127879: Fix data race in `_PyFreeList_Push` (#127880) Writes to the `ob_tid` field need to use atomics because it may be concurrently read by a non-locking dictionary, list, or structmember read. --- Include/internal/pycore_freelist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index da2d7bf6ae1393..84a5ab30f3eeea 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -51,7 +51,7 @@ static inline int _PyFreeList_Push(struct _Py_freelist *fl, void *obj, Py_ssize_t maxsize) { if (fl->size < maxsize && fl->size >= 0) { - *(void **)obj = fl->freelist; + FT_ATOMIC_STORE_PTR_RELAXED(*(void **)obj, fl->freelist); fl->freelist = obj; fl->size++; OBJECT_STAT_INC(to_freelist); From f823910bbd4bf01ec3e1ab7b3cb1d77815138296 Mon Sep 17 00:00:00 2001 From: velemas <10437413+velemas@users.noreply.github.com> Date: Thu, 12 Dec 2024 20:07:55 +0200 Subject: [PATCH 03/19] gh-127865: Fix build failure for systems without thread local support (GH-127866) This PR fixes the build issue introduced by the commit 628f6eb from GH-112207 on systems without thread local support. --- .../Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst | 1 + Python/import.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst diff --git a/Misc/NEWS.d/next/Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst b/Misc/NEWS.d/next/Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst new file mode 100644 index 00000000000000..3fc1d8a1b51d30 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst @@ -0,0 +1 @@ +Fix build failure on systems without thread-locals support. diff --git a/Python/import.c b/Python/import.c index b3c384c27718ce..f3511aaf7b8010 100644 --- a/Python/import.c +++ b/Python/import.c @@ -749,7 +749,7 @@ const char * _PyImport_ResolveNameWithPackageContext(const char *name) { #ifndef HAVE_THREAD_LOCAL - PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK); + PyMutex_Lock(&EXTENSIONS.mutex); #endif if (PKGCONTEXT != NULL) { const char *p = strrchr(PKGCONTEXT, '.'); @@ -759,7 +759,7 @@ _PyImport_ResolveNameWithPackageContext(const char *name) } } #ifndef HAVE_THREAD_LOCAL - PyThread_release_lock(EXTENSIONS.mutex); + PyMutex_Unlock(&EXTENSIONS.mutex); #endif return name; } @@ -768,12 +768,12 @@ const char * _PyImport_SwapPackageContext(const char *newcontext) { #ifndef HAVE_THREAD_LOCAL - PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK); + PyMutex_Lock(&EXTENSIONS.mutex); #endif const char *oldcontext = PKGCONTEXT; PKGCONTEXT = newcontext; #ifndef HAVE_THREAD_LOCAL - PyThread_release_lock(EXTENSIONS.mutex); + PyMutex_Unlock(&EXTENSIONS.mutex); #endif return oldcontext; } From 365451e28368db46ae89a3a990d85c10c2284aa2 Mon Sep 17 00:00:00 2001 From: Andrey Efremov Date: Fri, 13 Dec 2024 03:17:39 +0700 Subject: [PATCH 04/19] gh-127353: Allow to force color output on Windows (#127354) --- Lib/_colorize.py | 17 +++++---- Lib/test/test__colorize.py | 37 +++++++++++++++++++ ...-11-28-15-55-48.gh-issue-127353.i-XOXg.rst | 2 + 3 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 845fb57a90abb8..709081e25ec59b 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -32,14 +32,6 @@ def get_colors(colorize: bool = False) -> ANSIColors: def can_colorize() -> bool: - if sys.platform == "win32": - try: - import nt - - if not nt._supports_virtual_terminal(): - return False - except (ImportError, AttributeError): - return False if not sys.flags.ignore_environment: if os.environ.get("PYTHON_COLORS") == "0": return False @@ -58,6 +50,15 @@ def can_colorize() -> bool: if not hasattr(sys.stderr, "fileno"): return False + if sys.platform == "win32": + try: + import nt + + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False + try: return os.isatty(sys.stderr.fileno()) except io.UnsupportedOperation: diff --git a/Lib/test/test__colorize.py b/Lib/test/test__colorize.py index d55b97ade68cef..7a65d63f49eed7 100644 --- a/Lib/test/test__colorize.py +++ b/Lib/test/test__colorize.py @@ -50,10 +50,47 @@ def test_colorized_detection_checks_for_environment_variables(self): with unittest.mock.patch("os.environ", {'FORCE_COLOR': '1', "PYTHON_COLORS": '0'}): self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", {}): + self.assertEqual(_colorize.can_colorize(), True) + isatty_mock.return_value = False with unittest.mock.patch("os.environ", {}): self.assertEqual(_colorize.can_colorize(), False) + @force_not_colorized + @unittest.skipUnless(sys.platform == "win32", "Windows only") + def test_colorized_detection_checks_for_environment_variables_no_vt(self): + with (unittest.mock.patch("nt._supports_virtual_terminal", return_value=False), + unittest.mock.patch("os.isatty") as isatty_mock, + unittest.mock.patch("sys.flags", unittest.mock.MagicMock(ignore_environment=False)), + unittest.mock.patch("_colorize.can_colorize", ORIGINAL_CAN_COLORIZE)): + isatty_mock.return_value = True + with unittest.mock.patch("os.environ", {'TERM': 'dumb'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", {'PYTHON_COLORS': '1'}): + self.assertEqual(_colorize.can_colorize(), True) + with unittest.mock.patch("os.environ", {'PYTHON_COLORS': '0'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", {'NO_COLOR': '1'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", + {'NO_COLOR': '1', "PYTHON_COLORS": '1'}): + self.assertEqual(_colorize.can_colorize(), True) + with unittest.mock.patch("os.environ", {'FORCE_COLOR': '1'}): + self.assertEqual(_colorize.can_colorize(), True) + with unittest.mock.patch("os.environ", + {'FORCE_COLOR': '1', 'NO_COLOR': '1'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", + {'FORCE_COLOR': '1', "PYTHON_COLORS": '0'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", {}): + self.assertEqual(_colorize.can_colorize(), False) + + isatty_mock.return_value = False + with unittest.mock.patch("os.environ", {}): + self.assertEqual(_colorize.can_colorize(), False) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst b/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst new file mode 100644 index 00000000000000..88661b9a611071 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst @@ -0,0 +1,2 @@ +Allow to force color output on Windows using environment variables. Patch by +Andrey Efremov. From ed037d229f64db90aea00f397e9ce1b2f4a22d3f Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 12 Dec 2024 20:27:29 +0000 Subject: [PATCH 05/19] Fix typos in `Lib/_pydecimal.py` (#127700) --- Lib/_pydecimal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 5b60570c6c592a..ec036199331396 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -97,7 +97,7 @@ class DecimalException(ArithmeticError): Used exceptions derive from this. If an exception derives from another exception besides this (such as - Underflow (Inexact, Rounded, Subnormal) that indicates that it is only + Underflow (Inexact, Rounded, Subnormal)) that indicates that it is only called if the others are present. This isn't actually used for anything, though. @@ -145,7 +145,7 @@ class InvalidOperation(DecimalException): x ** (+-)INF An operand is invalid - The result of the operation after these is a quiet positive NaN, + The result of the operation after this is a quiet positive NaN, except when the cause is a signaling NaN, in which case the result is also a quiet NaN, but with the original sign, and an optional diagnostic information. From a8ffe661548e16ad02dbe6cb8a89513d7ed2a42c Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 12 Dec 2024 23:11:20 +0200 Subject: [PATCH 06/19] Revert "gh-127353: Allow to force color output on Windows (#127354)" (#127889) This reverts commit 365451e28368db46ae89a3a990d85c10c2284aa2. --- Lib/_colorize.py | 17 ++++----- Lib/test/test__colorize.py | 37 ------------------- ...-11-28-15-55-48.gh-issue-127353.i-XOXg.rst | 2 - 3 files changed, 8 insertions(+), 48 deletions(-) delete mode 100644 Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 709081e25ec59b..845fb57a90abb8 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -32,6 +32,14 @@ def get_colors(colorize: bool = False) -> ANSIColors: def can_colorize() -> bool: + if sys.platform == "win32": + try: + import nt + + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False if not sys.flags.ignore_environment: if os.environ.get("PYTHON_COLORS") == "0": return False @@ -50,15 +58,6 @@ def can_colorize() -> bool: if not hasattr(sys.stderr, "fileno"): return False - if sys.platform == "win32": - try: - import nt - - if not nt._supports_virtual_terminal(): - return False - except (ImportError, AttributeError): - return False - try: return os.isatty(sys.stderr.fileno()) except io.UnsupportedOperation: diff --git a/Lib/test/test__colorize.py b/Lib/test/test__colorize.py index 7a65d63f49eed7..d55b97ade68cef 100644 --- a/Lib/test/test__colorize.py +++ b/Lib/test/test__colorize.py @@ -50,47 +50,10 @@ def test_colorized_detection_checks_for_environment_variables(self): with unittest.mock.patch("os.environ", {'FORCE_COLOR': '1', "PYTHON_COLORS": '0'}): self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", {}): - self.assertEqual(_colorize.can_colorize(), True) - isatty_mock.return_value = False with unittest.mock.patch("os.environ", {}): self.assertEqual(_colorize.can_colorize(), False) - @force_not_colorized - @unittest.skipUnless(sys.platform == "win32", "Windows only") - def test_colorized_detection_checks_for_environment_variables_no_vt(self): - with (unittest.mock.patch("nt._supports_virtual_terminal", return_value=False), - unittest.mock.patch("os.isatty") as isatty_mock, - unittest.mock.patch("sys.flags", unittest.mock.MagicMock(ignore_environment=False)), - unittest.mock.patch("_colorize.can_colorize", ORIGINAL_CAN_COLORIZE)): - isatty_mock.return_value = True - with unittest.mock.patch("os.environ", {'TERM': 'dumb'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", {'PYTHON_COLORS': '1'}): - self.assertEqual(_colorize.can_colorize(), True) - with unittest.mock.patch("os.environ", {'PYTHON_COLORS': '0'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", {'NO_COLOR': '1'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", - {'NO_COLOR': '1', "PYTHON_COLORS": '1'}): - self.assertEqual(_colorize.can_colorize(), True) - with unittest.mock.patch("os.environ", {'FORCE_COLOR': '1'}): - self.assertEqual(_colorize.can_colorize(), True) - with unittest.mock.patch("os.environ", - {'FORCE_COLOR': '1', 'NO_COLOR': '1'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", - {'FORCE_COLOR': '1', "PYTHON_COLORS": '0'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", {}): - self.assertEqual(_colorize.can_colorize(), False) - - isatty_mock.return_value = False - with unittest.mock.patch("os.environ", {}): - self.assertEqual(_colorize.can_colorize(), False) - if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst b/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst deleted file mode 100644 index 88661b9a611071..00000000000000 --- a/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst +++ /dev/null @@ -1,2 +0,0 @@ -Allow to force color output on Windows using environment variables. Patch by -Andrey Efremov. From 8ac307f0d6834148471d2e12a45bf022e659164c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns=20=F0=9F=87=B5=F0=9F=87=B8?= Date: Thu, 12 Dec 2024 21:41:46 +0000 Subject: [PATCH 07/19] GH-127724: don't use sysconfig to calculate the venv local include path (#127731) --- Lib/venv/__init__.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index ca1af84e6705fe..dc4c9ef3531991 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -103,8 +103,6 @@ def _venv_path(self, env_dir, name): vars = { 'base': env_dir, 'platbase': env_dir, - 'installed_base': env_dir, - 'installed_platbase': env_dir, } return sysconfig.get_path(name, scheme='venv', vars=vars) @@ -175,9 +173,20 @@ def create_if_needed(d): context.python_dir = dirname context.python_exe = exename binpath = self._venv_path(env_dir, 'scripts') - incpath = self._venv_path(env_dir, 'include') libpath = self._venv_path(env_dir, 'purelib') + # PEP 405 says venvs should create a local include directory. + # See https://peps.python.org/pep-0405/#include-files + # XXX: This directory is not exposed in sysconfig or anywhere else, and + # doesn't seem to be utilized by modern packaging tools. We keep it + # for backwards-compatibility, and to follow the PEP, but I would + # recommend against using it, as most tooling does not pass it to + # compilers. Instead, until we standardize a site-specific include + # directory, I would recommend installing headers as package data, + # and providing some sort of API to get the include directories. + # Example: https://numpy.org/doc/2.1/reference/generated/numpy.get_include.html + incpath = os.path.join(env_dir, 'Include' if os.name == 'nt' else 'include') + context.inc_path = incpath create_if_needed(incpath) context.lib_path = libpath From 0cbc19d59e409854f2b9bdda75e1af2b6cd89ac2 Mon Sep 17 00:00:00 2001 From: Daniel Haag <121057143+denialhaag@users.noreply.github.com> Date: Thu, 12 Dec 2024 22:43:44 +0100 Subject: [PATCH 08/19] Fix typo in traceback docs (#127884) --- Doc/library/traceback.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 4899ed64ebad8d..b0ee3fc56ad735 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -274,7 +274,7 @@ Module-Level Functions :class:`!TracebackException` objects are created from actual exceptions to capture data for later printing. They offer a more lightweight method of storing this information by avoiding holding references to -:ref:`traceback` and :ref:`frame` objects +:ref:`traceback` and :ref:`frame` objects. In addition, they expose more options to configure the output compared to the module-level functions described above. From ba2d2fda93a03a91ac6cdff319fd23ef51848d51 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Fri, 13 Dec 2024 05:49:02 +0800 Subject: [PATCH 09/19] gh-127845: Minor improvements to iOS test runner script (#127846) Uses symlinks to install iOS framework into testbed clone, adds a verbose mode to the iOS runner to hide most Xcode output, adds another mechanism to disable terminal colors, and ensures that stdout is flushed after every write. --- Makefile.pre.in | 2 +- iOS/testbed/__main__.py | 66 ++++++++++++++----- iOS/testbed/iOSTestbedTests/iOSTestbedTests.m | 5 +- 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 7b66802147dc3a..3e880f7800fccf 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2169,7 +2169,7 @@ testios: $(PYTHON_FOR_BUILD) $(srcdir)/iOS/testbed clone --framework $(PYTHONFRAMEWORKPREFIX) "$(XCFOLDER)" # Run the testbed project - $(PYTHON_FOR_BUILD) "$(XCFOLDER)" run -- test -uall --single-process --rerun -W + $(PYTHON_FOR_BUILD) "$(XCFOLDER)" run --verbose -- test -uall --single-process --rerun -W # Like test, but using --slow-ci which enables all test resources and use # longer timeout. Run an optional pybuildbot.identify script to include diff --git a/iOS/testbed/__main__.py b/iOS/testbed/__main__.py index 22570ee0f3ed04..068272835a5b95 100644 --- a/iOS/testbed/__main__.py +++ b/iOS/testbed/__main__.py @@ -141,10 +141,12 @@ async def log_stream_task(initial_devices): else: suppress_dupes = False sys.stdout.write(line) + sys.stdout.flush() -async def xcode_test(location, simulator): +async def xcode_test(location, simulator, verbose): # Run the test suite on the named simulator + print("Starting xcodebuild...") args = [ "xcodebuild", "test", @@ -159,6 +161,9 @@ async def xcode_test(location, simulator): "-derivedDataPath", str(location / "DerivedData"), ] + if not verbose: + args += ["-quiet"] + async with async_process( *args, stdout=subprocess.PIPE, @@ -166,6 +171,7 @@ async def xcode_test(location, simulator): ) as process: while line := (await process.stdout.readline()).decode(*DECODE_ARGS): sys.stdout.write(line) + sys.stdout.flush() status = await asyncio.wait_for(process.wait(), timeout=1) exit(status) @@ -182,7 +188,9 @@ def clone_testbed( sys.exit(10) if framework is None: - if not (source / "Python.xcframework/ios-arm64_x86_64-simulator/bin").is_dir(): + if not ( + source / "Python.xcframework/ios-arm64_x86_64-simulator/bin" + ).is_dir(): print( f"The testbed being cloned ({source}) does not contain " f"a simulator framework. Re-run with --framework" @@ -202,33 +210,48 @@ def clone_testbed( ) sys.exit(13) - print("Cloning testbed project...") - shutil.copytree(source, target) + print("Cloning testbed project:") + print(f" Cloning {source}...", end="", flush=True) + shutil.copytree(source, target, symlinks=True) + print(" done") if framework is not None: if framework.suffix == ".xcframework": - print("Installing XCFramework...") - xc_framework_path = target / "Python.xcframework" - shutil.rmtree(xc_framework_path) - shutil.copytree(framework, xc_framework_path) + print(" Installing XCFramework...", end="", flush=True) + xc_framework_path = (target / "Python.xcframework").resolve() + if xc_framework_path.is_dir(): + shutil.rmtree(xc_framework_path) + else: + xc_framework_path.unlink() + xc_framework_path.symlink_to( + framework.relative_to(xc_framework_path.parent, walk_up=True) + ) + print(" done") else: - print("Installing simulator Framework...") + print(" Installing simulator framework...", end="", flush=True) sim_framework_path = ( target / "Python.xcframework" / "ios-arm64_x86_64-simulator" + ).resolve() + if sim_framework_path.is_dir(): + shutil.rmtree(sim_framework_path) + else: + sim_framework_path.unlink() + sim_framework_path.symlink_to( + framework.relative_to(sim_framework_path.parent, walk_up=True) ) - shutil.rmtree(sim_framework_path) - shutil.copytree(framework, sim_framework_path) + print(" done") else: - print("Using pre-existing iOS framework.") + print(" Using pre-existing iOS framework.") for app_src in apps: - print(f"Installing app {app_src.name!r}...") + print(f" Installing app {app_src.name!r}...", end="", flush=True) app_target = target / f"iOSTestbed/app/{app_src.name}" if app_target.is_dir(): shutil.rmtree(app_target) shutil.copytree(app_src, app_target) + print(" done") - print(f"Testbed project created in {target}") + print(f"Successfully cloned testbed: {target.resolve()}") def update_plist(testbed_path, args): @@ -243,10 +266,11 @@ def update_plist(testbed_path, args): plistlib.dump(info, f) -async def run_testbed(simulator: str, args: list[str]): +async def run_testbed(simulator: str, args: list[str], verbose: bool=False): location = Path(__file__).parent - print("Updating plist...") + print("Updating plist...", end="", flush=True) update_plist(location, args) + print(" done.") # Get the list of devices that are booted at the start of the test run. # The simulator started by the test suite will be detected as the new @@ -256,7 +280,7 @@ async def run_testbed(simulator: str, args: list[str]): try: async with asyncio.TaskGroup() as tg: tg.create_task(log_stream_task(initial_devices)) - tg.create_task(xcode_test(location, simulator)) + tg.create_task(xcode_test(location, simulator=simulator, verbose=verbose)) except* MySystemExit as e: raise SystemExit(*e.exceptions[0].args) from None except* subprocess.CalledProcessError as e: @@ -315,6 +339,11 @@ def main(): default="iPhone SE (3rd Generation)", help="The name of the simulator to use (default: 'iPhone SE (3rd Generation)')", ) + run.add_argument( + "-v", "--verbose", + action="store_true", + help="Enable verbose output", + ) try: pos = sys.argv.index("--") @@ -330,7 +359,7 @@ def main(): clone_testbed( source=Path(__file__).parent, target=Path(context.location), - framework=Path(context.framework) if context.framework else None, + framework=Path(context.framework).resolve() if context.framework else None, apps=[Path(app) for app in context.apps], ) elif context.subcommand == "run": @@ -348,6 +377,7 @@ def main(): asyncio.run( run_testbed( simulator=context.simulator, + verbose=context.verbose, args=test_args, ) ) diff --git a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m index ac78456a61e65e..6db38253396c8d 100644 --- a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m +++ b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m @@ -24,8 +24,11 @@ - (void)testPython { NSString *resourcePath = [[NSBundle mainBundle] resourcePath]; - // Disable all color, as the Xcode log can't display color + // Set some other common environment indicators to disable color, as the + // Xcode log can't display color. Stdout will report that it is *not* a + // TTY. setenv("NO_COLOR", "1", true); + setenv("PY_COLORS", "0", true); // Arguments to pass into the test suite runner. // argv[0] must identify the process; any subsequent arg From 58942a07df8811afba9c58dc16c1aab244ccf27a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= Date: Fri, 13 Dec 2024 10:26:22 +0100 Subject: [PATCH 10/19] Document PyObject_SelfIter (#127861) --- Doc/c-api/object.rst | 6 ++++++ Doc/data/refcounts.dat | 3 +++ 2 files changed, 9 insertions(+) diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 1ae3c46bea46ea..f97ade01e67850 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -509,6 +509,12 @@ Object Protocol iterated. +.. c:function:: PyObject* PyObject_SelfIter(PyObject *obj) + + This is equivalent to the Python ``__iter__(self): return self`` method. + It is intended for :term:`iterator` types, to be used in the :c:member:`PyTypeObject.tp_iter` slot. + + .. c:function:: PyObject* PyObject_GetAIter(PyObject *o) This is the equivalent to the Python expression ``aiter(o)``. Takes an diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 3f49c88c3cc028..a043af48ba7a05 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1849,6 +1849,9 @@ PyObject_RichCompareBool:PyObject*:o1:0: PyObject_RichCompareBool:PyObject*:o2:0: PyObject_RichCompareBool:int:opid:: +PyObject_SelfIter:PyObject*::+1: +PyObject_SelfIter:PyObject*:obj:0: + PyObject_SetAttr:int::: PyObject_SetAttr:PyObject*:o:0: PyObject_SetAttr:PyObject*:attr_name:0: From 11ff3286b7e821bf439bc7caa0fa712e3bc3846a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20K=C3=A1lm=C3=A1n?= Date: Fri, 13 Dec 2024 10:27:02 +0100 Subject: [PATCH 11/19] link to the correct output method in documentation (#127857) --- Doc/library/http.cookies.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/http.cookies.rst b/Doc/library/http.cookies.rst index 4ce2e3c4f4cb42..ad37a0fca4742d 100644 --- a/Doc/library/http.cookies.rst +++ b/Doc/library/http.cookies.rst @@ -98,7 +98,7 @@ Cookie Objects .. method:: BaseCookie.output(attrs=None, header='Set-Cookie:', sep='\r\n') Return a string representation suitable to be sent as HTTP headers. *attrs* and - *header* are sent to each :class:`Morsel`'s :meth:`output` method. *sep* is used + *header* are sent to each :class:`Morsel`'s :meth:`~Morsel.output` method. *sep* is used to join the headers together, and is by default the combination ``'\r\n'`` (CRLF). From 9b4bbf4401291636e5db90511a0548fffb23a505 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 13 Dec 2024 09:54:59 +0000 Subject: [PATCH 12/19] GH-125174: Don't use `UINT32_MAX` in header file (GH-127863) --- Include/refcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/refcount.h b/Include/refcount.h index 6908c426141378..d98b2dfcf37202 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -377,7 +377,7 @@ static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) #if SIZEOF_VOID_P > 4 /* If an object has been freed, it will have a negative full refcnt * If it has not it been freed, will have a very large refcnt */ - if (op->ob_refcnt_full <= 0 || op->ob_refcnt > (UINT32_MAX - (1<<20))) { + if (op->ob_refcnt_full <= 0 || op->ob_refcnt > (((PY_UINT32_T)-1) - (1<<20))) { #else if (op->ob_refcnt <= 0) { #endif From 5fc6bb2754a25157575efc0b37da78c629fea46e Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 13 Dec 2024 11:06:26 +0100 Subject: [PATCH 13/19] gh-126868: Add freelist for compact int objects (GH-126865) --- Include/internal/pycore_freelist_state.h | 2 + Include/internal/pycore_long.h | 2 + ...-11-16-22-37-46.gh-issue-126868.yOoHSY.rst | 1 + Objects/longobject.c | 78 ++++++++++++++----- Objects/object.c | 1 + Python/bytecodes.c | 25 +++--- Python/executor_cases.c.h | 24 +++--- Python/generated_cases.c.h | 24 +++--- 8 files changed, 102 insertions(+), 55 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-16-22-37-46.gh-issue-126868.yOoHSY.rst diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 4e04cf431e0b31..a1a94c1f2dc880 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -14,6 +14,7 @@ extern "C" { # define Py_dicts_MAXFREELIST 80 # define Py_dictkeys_MAXFREELIST 80 # define Py_floats_MAXFREELIST 100 +# define Py_ints_MAXFREELIST 100 # define Py_slices_MAXFREELIST 1 # define Py_contexts_MAXFREELIST 255 # define Py_async_gens_MAXFREELIST 80 @@ -35,6 +36,7 @@ struct _Py_freelist { struct _Py_freelists { struct _Py_freelist floats; + struct _Py_freelist ints; struct _Py_freelist tuples[PyTuple_MAXSAVESIZE]; struct _Py_freelist lists; struct _Py_freelist dicts; diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 196b4152280a35..8bead00e70640c 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -55,6 +55,8 @@ extern void _PyLong_FiniTypes(PyInterpreterState *interp); /* other API */ +PyAPI_FUNC(void) _PyLong_ExactDealloc(PyObject *self); + #define _PyLong_SMALL_INTS _Py_SINGLETON(small_ints) // _PyLong_GetZero() and _PyLong_GetOne() must always be available diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-16-22-37-46.gh-issue-126868.yOoHSY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-16-22-37-46.gh-issue-126868.yOoHSY.rst new file mode 100644 index 00000000000000..fd1570908c1fd6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-16-22-37-46.gh-issue-126868.yOoHSY.rst @@ -0,0 +1 @@ +Increase performance of :class:`int` by adding a freelist for compact ints. diff --git a/Objects/longobject.c b/Objects/longobject.c index 4aa35685b509f2..96d59f542a7c3c 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6,6 +6,7 @@ #include "pycore_bitutils.h" // _Py_popcount32() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_call.h" // _PyObject_MakeTpCall +#include "pycore_freelist.h" // _Py_FREELIST_FREE, _Py_FREELIST_POP #include "pycore_long.h" // _Py_SmallInts #include "pycore_object.h" // _PyObject_Init() #include "pycore_runtime.h" // _PY_NSMALLPOSINTS @@ -42,7 +43,7 @@ static inline void _Py_DECREF_INT(PyLongObject *op) { assert(PyLong_CheckExact(op)); - _Py_DECREF_SPECIALIZED((PyObject *)op, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED((PyObject *)op, _PyLong_ExactDealloc); } static inline int @@ -220,15 +221,18 @@ _PyLong_FromMedium(sdigit x) { assert(!IS_SMALL_INT(x)); assert(is_medium_int(x)); - /* We could use a freelist here */ - PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject)); + + PyLongObject *v = (PyLongObject *)_Py_FREELIST_POP(PyLongObject, ints); if (v == NULL) { - PyErr_NoMemory(); - return NULL; + v = PyObject_Malloc(sizeof(PyLongObject)); + if (v == NULL) { + PyErr_NoMemory(); + return NULL; + } + _PyObject_Init((PyObject*)v, &PyLong_Type); } digit abs_x = x < 0 ? -x : x; _PyLong_SetSignAndDigitCount(v, x<0?-1:1, 1); - _PyObject_Init((PyObject*)v, &PyLong_Type); v->long_value.ob_digit[0] = abs_x; return (PyObject*)v; } @@ -3611,24 +3615,60 @@ long_richcompare(PyObject *self, PyObject *other, int op) Py_RETURN_RICHCOMPARE(result, 0, op); } +static inline int +compact_int_is_small(PyObject *self) +{ + PyLongObject *pylong = (PyLongObject *)self; + assert(_PyLong_IsCompact(pylong)); + stwodigits ival = medium_value(pylong); + if (IS_SMALL_INT(ival)) { + PyLongObject *small_pylong = (PyLongObject *)get_small_int((sdigit)ival); + if (pylong == small_pylong) { + return 1; + } + } + return 0; +} + +void +_PyLong_ExactDealloc(PyObject *self) +{ + assert(PyLong_CheckExact(self)); + if (_PyLong_IsCompact((PyLongObject *)self)) { + #ifndef Py_GIL_DISABLED + if (compact_int_is_small(self)) { + // See PEP 683, section Accidental De-Immortalizing for details + _Py_SetImmortal(self); + return; + } + #endif + _Py_FREELIST_FREE(ints, self, PyObject_Free); + return; + } + PyObject_Free(self); +} + static void long_dealloc(PyObject *self) { - /* This should never get called, but we also don't want to SEGV if - * we accidentally decref small Ints out of existence. Instead, - * since small Ints are immortal, re-set the reference count. - */ - PyLongObject *pylong = (PyLongObject*)self; - if (pylong && _PyLong_IsCompact(pylong)) { - stwodigits ival = medium_value(pylong); - if (IS_SMALL_INT(ival)) { - PyLongObject *small_pylong = (PyLongObject *)get_small_int((sdigit)ival); - if (pylong == small_pylong) { - _Py_SetImmortal(self); - return; - } + assert(self); + if (_PyLong_IsCompact((PyLongObject *)self)) { + if (compact_int_is_small(self)) { + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref small Ints out of existence. Instead, + * since small Ints are immortal, re-set the reference count. + * + * See PEP 683, section Accidental De-Immortalizing for details + */ + _Py_SetImmortal(self); + return; + } + if (PyLong_CheckExact(self)) { + _Py_FREELIST_FREE(ints, self, PyObject_Free); + return; } } + Py_TYPE(self)->tp_free(self); } diff --git a/Objects/object.c b/Objects/object.c index c64675b5e1d6c2..d584414c559b9d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -936,6 +936,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); + clear_freelist(&freelists->ints, is_finalization, free_object); } /* diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d0e4c2bc45489b..f0eb5405faeff5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -26,6 +26,7 @@ #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_range.h" // _PyRangeIterObject +#include "pycore_long.h" // _PyLong_ExactDealloc() #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_sliceobject.h" // _PyBuildSlice_ConsumeRefs #include "pycore_tuple.h" // _PyTuple_ITEMS() @@ -514,8 +515,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -527,8 +528,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -540,8 +541,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -801,7 +802,7 @@ dummy_func( assert(res_o != NULL); Py_INCREF(res_o); #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -821,7 +822,7 @@ dummy_func( DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -842,7 +843,7 @@ dummy_func( PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -959,7 +960,7 @@ dummy_func( assert(old_value != NULL); UNLOCK_OBJECT(list); // unlock before decrefs! Py_DECREF(old_value); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(list_st); } @@ -2476,9 +2477,9 @@ dummy_func( Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); DEAD(left); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); DEAD(right); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 18f19773d25c90..19ba67a8af6769 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -626,8 +626,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -646,8 +646,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -666,8 +666,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1000,7 +1000,7 @@ assert(res_o != NULL); Py_INCREF(res_o); #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1042,7 +1042,7 @@ } STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1081,7 +1081,7 @@ PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1264,7 +1264,7 @@ assert(old_value != NULL); UNLOCK_OBJECT(list); // unlock before decrefs! Py_DECREF(old_value); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(list_st); stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); @@ -3075,8 +3075,8 @@ Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. stack_pointer[-2] = res; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fc0f55555f5c36..51227c9868b8cc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -118,8 +118,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -285,8 +285,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -356,8 +356,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -590,7 +590,7 @@ assert(res_o != NULL); Py_INCREF(res_o); #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -622,7 +622,7 @@ DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c, BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -654,7 +654,7 @@ PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -3333,8 +3333,8 @@ Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. } @@ -7721,7 +7721,7 @@ assert(old_value != NULL); UNLOCK_OBJECT(list); // unlock before decrefs! Py_DECREF(old_value); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(list_st); stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); From e62e1ca4553dbcf9d7f89be24bebcbd9213f9ae5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 13 Dec 2024 11:00:00 +0000 Subject: [PATCH 14/19] GH-126833: Dumps graphviz representation of executor graph. (GH-126880) --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_optimizer.h | 5 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 + Python/ceval.c | 1 + Python/clinic/sysmodule.c.h | 58 +++++++- Python/optimizer.c | 136 +++++++++++++++++- Python/sysmodule.c | 25 ++++ 9 files changed, 230 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index c12e242d560bde..90214a314031d1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1129,6 +1129,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(origin)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(out_fd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(outgoing)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(outpath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index dfd9f2b799ec8e..97a75d0c46c867 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -618,6 +618,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(origin) STRUCT_FOR_ID(out_fd) STRUCT_FOR_ID(outgoing) + STRUCT_FOR_ID(outpath) STRUCT_FOR_ID(overlapped) STRUCT_FOR_ID(owner) STRUCT_FOR_ID(pages) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 6d70b42f708854..bc7cfcde613d65 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -60,6 +60,9 @@ typedef struct { }; uint64_t operand0; // A cache entry uint64_t operand1; +#ifdef Py_STATS + uint64_t execution_count; +#endif } _PyUOpInstruction; typedef struct { @@ -285,6 +288,8 @@ static inline int is_terminator(const _PyUOpInstruction *uop) ); } +PyAPI_FUNC(int) _PyDumpExecutors(FILE *out); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index b631382cae058a..4f928cc050bf8e 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1127,6 +1127,7 @@ extern "C" { INIT_ID(origin), \ INIT_ID(out_fd), \ INIT_ID(outgoing), \ + INIT_ID(outpath), \ INIT_ID(overlapped), \ INIT_ID(owner), \ INIT_ID(pages), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 24cec3a4fded7a..5b78d038fc1192 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2268,6 +2268,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(outpath); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(overlapped); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Python/ceval.c b/Python/ceval.c index 5eda033eced628..fd891d7839151e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1095,6 +1095,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int UOP_PAIR_INC(uopcode, lastuop); #ifdef Py_STATS trace_uop_execution_counter++; + ((_PyUOpInstruction *)next_uop)[-1].execution_count++; #endif switch (uopcode) { diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 86c42ceffc5e31..cfcbd55388efa0 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1481,6 +1481,62 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored)) return sys_is_stack_trampoline_active_impl(module); } +PyDoc_STRVAR(sys__dump_tracelets__doc__, +"_dump_tracelets($module, /, outpath)\n" +"--\n" +"\n" +"Dump the graph of tracelets in graphviz format"); + +#define SYS__DUMP_TRACELETS_METHODDEF \ + {"_dump_tracelets", _PyCFunction_CAST(sys__dump_tracelets), METH_FASTCALL|METH_KEYWORDS, sys__dump_tracelets__doc__}, + +static PyObject * +sys__dump_tracelets_impl(PyObject *module, PyObject *outpath); + +static PyObject * +sys__dump_tracelets(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(outpath), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"outpath", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_dump_tracelets", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *outpath; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + outpath = args[0]; + return_value = sys__dump_tracelets_impl(module, outpath); + +exit: + return return_value; +} + PyDoc_STRVAR(sys__getframemodulename__doc__, "_getframemodulename($module, /, depth=0)\n" "--\n" @@ -1668,4 +1724,4 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=6d4f6cd20419b675 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=568b0a0069dc43e8 input=a9049054013a1b77]*/ diff --git a/Python/optimizer.c b/Python/optimizer.c index 6a232218981dcd..6a4d20fad76c15 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1,6 +1,7 @@ +#include "Python.h" + #ifdef _Py_TIER2 -#include "Python.h" #include "opcode.h" #include "pycore_interp.h" #include "pycore_backoff.h" @@ -474,6 +475,9 @@ add_to_trace( trace[trace_length].target = target; trace[trace_length].oparg = oparg; trace[trace_length].operand0 = operand; +#ifdef Py_STATS + trace[trace_length].execution_count = 0; +#endif return trace_length + 1; } @@ -983,6 +987,9 @@ static void make_exit(_PyUOpInstruction *inst, int opcode, int target) inst->operand0 = 0; inst->format = UOP_FORMAT_TARGET; inst->target = target; +#ifdef Py_STATS + inst->execution_count = 0; +#endif } /* Convert implicit exits, errors and deopts @@ -1709,4 +1716,131 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) _Py_Executors_InvalidateAll(interp, 0); } +static void +write_str(PyObject *str, FILE *out) +{ + // Encode the Unicode object to the specified encoding + PyObject *encoded_obj = PyUnicode_AsEncodedString(str, "utf8", "strict"); + if (encoded_obj == NULL) { + PyErr_Clear(); + return; + } + const char *encoded_str = PyBytes_AsString(encoded_obj); + Py_ssize_t encoded_size = PyBytes_Size(encoded_obj); + fwrite(encoded_str, 1, encoded_size, out); + Py_DECREF(encoded_obj); +} + +static int +find_line_number(PyCodeObject *code, _PyExecutorObject *executor) +{ + int code_len = (int)Py_SIZE(code); + for (int i = 0; i < code_len; i++) { + _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; + int opcode = instr->op.code; + if (opcode == ENTER_EXECUTOR) { + _PyExecutorObject *exec = code->co_executors->executors[instr->op.arg]; + if (exec == executor) { + return PyCode_Addr2Line(code, i*2); + } + } + i += _PyOpcode_Caches[_Py_GetBaseCodeUnit(code, i).op.code]; + } + return -1; +} + +/* Writes the node and outgoing edges for a single tracelet in graphviz format. + * Each tracelet is presented as a table of the uops it contains. + * If Py_STATS is enabled, execution counts are included. + * + * https://graphviz.readthedocs.io/en/stable/manual.html + * https://graphviz.org/gallery/ + */ +static void +executor_to_gv(_PyExecutorObject *executor, FILE *out) +{ + PyCodeObject *code = executor->vm_data.code; + fprintf(out, "executor_%p [\n", executor); + fprintf(out, " shape = none\n"); + + /* Write the HTML table for the uops */ + fprintf(out, " label = <\n"); + fprintf(out, " \n"); + if (code == NULL) { + fprintf(out, " \n"); + } + else { + fprintf(out, " \n", line); + } + for (uint32_t i = 0; i < executor->code_size; i++) { + /* Write row for uop. + * The `port` is a marker so that outgoing edges can + * be placed correctly. If a row is marked `port=17`, + * then the outgoing edge is `{EXEC_NAME}:17 -> {TARGET}` + * https://graphviz.readthedocs.io/en/stable/manual.html#node-ports-compass + */ + _PyUOpInstruction const *inst = &executor->trace[i]; + const char *opname = _PyOpcode_uop_name[inst->opcode]; +#ifdef Py_STATS + fprintf(out, " \n", i, opname, inst->execution_count); +#else + fprintf(out, " \n", i, opname); +#endif + if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) { + break; + } + } + fprintf(out, "
Executor
No code object
"); + write_str(code->co_qualname, out); + int line = find_line_number(code, executor); + fprintf(out, ": %d
%s -- %" PRIu64 "
%s
>\n"); + fprintf(out, "]\n\n"); + + /* Write all the outgoing edges */ + for (uint32_t i = 0; i < executor->code_size; i++) { + _PyUOpInstruction const *inst = &executor->trace[i]; + uint16_t flags = _PyUop_Flags[inst->opcode]; + _PyExitData *exit = NULL; + if (inst->opcode == _EXIT_TRACE) { + exit = (_PyExitData *)inst->operand0; + } + else if (flags & HAS_EXIT_FLAG) { + assert(inst->format == UOP_FORMAT_JUMP); + _PyUOpInstruction const *exit_inst = &executor->trace[inst->jump_target]; + assert(exit_inst->opcode == _EXIT_TRACE); + exit = (_PyExitData *)exit_inst->operand0; + } + if (exit != NULL && exit->executor != NULL) { + fprintf(out, "executor_%p:i%d -> executor_%p:start\n", executor, i, exit->executor); + } + if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) { + break; + } + } +} + +/* Write the graph of all the live tracelets in graphviz format. */ +int +_PyDumpExecutors(FILE *out) +{ + fprintf(out, "digraph ideal {\n\n"); + fprintf(out, " rankdir = \"LR\"\n\n"); + PyInterpreterState *interp = PyInterpreterState_Get(); + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + executor_to_gv(exec, out); + exec = exec->vm_data.links.next; + } + fprintf(out, "}\n\n"); + return 0; +} + +#else + +int +_PyDumpExecutors(FILE *out) +{ + PyErr_SetString(PyExc_NotImplementedError, "No JIT available"); + return -1; +} + #endif /* _Py_TIER2 */ diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 6df297f364c5d3..d6719f9bb0af91 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2344,6 +2344,30 @@ sys_is_stack_trampoline_active_impl(PyObject *module) Py_RETURN_FALSE; } +/*[clinic input] +sys._dump_tracelets + + outpath: object + +Dump the graph of tracelets in graphviz format +[clinic start generated code]*/ + +static PyObject * +sys__dump_tracelets_impl(PyObject *module, PyObject *outpath) +/*[clinic end generated code: output=a7fe265e2bc3b674 input=5bff6880cd28ffd1]*/ +{ + FILE *out = _Py_fopen_obj(outpath, "wb"); + if (out == NULL) { + return NULL; + } + int err = _PyDumpExecutors(out); + fclose(out); + if (err) { + return NULL; + } + Py_RETURN_NONE; +} + /*[clinic input] sys._getframemodulename @@ -2603,6 +2627,7 @@ static PyMethodDef sys_methods[] = { #endif SYS__GET_CPU_COUNT_CONFIG_METHODDEF SYS__IS_GIL_ENABLED_METHODDEF + SYS__DUMP_TRACELETS_METHODDEF {NULL, NULL} // sentinel }; From 6ff38fc4e2af8e795dc791be6ea596d2146d4119 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Dec 2024 13:53:47 +0100 Subject: [PATCH 15/19] gh-127870: Detect recursive calls in ctypes _as_parameter_ handling (#127872) --- Lib/test/test_ctypes/test_as_parameter.py | 12 ++++++++-- ...-12-12-16-59-42.gh-issue-127870._NFG-3.rst | 2 ++ Modules/_ctypes/_ctypes.c | 22 ++++++++++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-12-16-59-42.gh-issue-127870._NFG-3.rst diff --git a/Lib/test/test_ctypes/test_as_parameter.py b/Lib/test/test_ctypes/test_as_parameter.py index cc62b1a22a3b06..c5e1840b0eb7af 100644 --- a/Lib/test/test_ctypes/test_as_parameter.py +++ b/Lib/test/test_ctypes/test_as_parameter.py @@ -198,8 +198,16 @@ class A: a = A() a._as_parameter_ = a - with self.assertRaises(RecursionError): - c_int.from_param(a) + for c_type in ( + ctypes.c_wchar_p, + ctypes.c_char_p, + ctypes.c_void_p, + ctypes.c_int, # PyCSimpleType + POINT, # CDataType + ): + with self.subTest(c_type=c_type): + with self.assertRaises(RecursionError): + c_type.from_param(a) class AsParamWrapper: diff --git a/Misc/NEWS.d/next/Library/2024-12-12-16-59-42.gh-issue-127870._NFG-3.rst b/Misc/NEWS.d/next/Library/2024-12-12-16-59-42.gh-issue-127870._NFG-3.rst new file mode 100644 index 00000000000000..99b2df00032082 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-12-16-59-42.gh-issue-127870._NFG-3.rst @@ -0,0 +1,2 @@ +Detect recursive calls in ctypes ``_as_parameter_`` handling. +Patch by Victor Stinner. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 34529bce496d88..bb4699884057ba 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -1052,8 +1052,13 @@ CDataType_from_param_impl(PyObject *type, PyTypeObject *cls, PyObject *value) return NULL; } if (as_parameter) { + if (_Py_EnterRecursiveCall(" while processing _as_parameter_")) { + Py_DECREF(as_parameter); + return NULL; + } value = CDataType_from_param_impl(type, cls, as_parameter); Py_DECREF(as_parameter); + _Py_LeaveRecursiveCall(); return value; } PyErr_Format(PyExc_TypeError, @@ -1843,8 +1848,13 @@ c_wchar_p_from_param_impl(PyObject *type, PyTypeObject *cls, PyObject *value) return NULL; } if (as_parameter) { + if (_Py_EnterRecursiveCall(" while processing _as_parameter_")) { + Py_DECREF(as_parameter); + return NULL; + } value = c_wchar_p_from_param_impl(type, cls, as_parameter); Py_DECREF(as_parameter); + _Py_LeaveRecursiveCall(); return value; } PyErr_Format(PyExc_TypeError, @@ -1927,8 +1937,13 @@ c_char_p_from_param_impl(PyObject *type, PyTypeObject *cls, PyObject *value) return NULL; } if (as_parameter) { + if (_Py_EnterRecursiveCall(" while processing _as_parameter_")) { + Py_DECREF(as_parameter); + return NULL; + } value = c_char_p_from_param_impl(type, cls, as_parameter); Py_DECREF(as_parameter); + _Py_LeaveRecursiveCall(); return value; } PyErr_Format(PyExc_TypeError, @@ -2079,8 +2094,13 @@ c_void_p_from_param_impl(PyObject *type, PyTypeObject *cls, PyObject *value) return NULL; } if (as_parameter) { + if (_Py_EnterRecursiveCall(" while processing _as_parameter_")) { + Py_DECREF(as_parameter); + return NULL; + } value = c_void_p_from_param_impl(type, cls, as_parameter); Py_DECREF(as_parameter); + _Py_LeaveRecursiveCall(); return value; } PyErr_Format(PyExc_TypeError, @@ -2447,9 +2467,9 @@ PyCSimpleType_from_param_impl(PyObject *type, PyTypeObject *cls, return NULL; } value = PyCSimpleType_from_param_impl(type, cls, as_parameter); - _Py_LeaveRecursiveCall(); Py_DECREF(as_parameter); Py_XDECREF(exc); + _Py_LeaveRecursiveCall(); return value; } if (exc) { From d05a4e6a0d366b854a3103cae0c941811fd48c4c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Dec 2024 14:23:20 +0100 Subject: [PATCH 16/19] gh-127906: Test the limited C API in test_cppext (#127916) --- Lib/test/test_cppext/__init__.py | 13 ++++++++++--- Lib/test/test_cppext/extension.cpp | 9 +++++++++ Lib/test/test_cppext/setup.py | 6 ++++++ .../2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst | 1 + 4 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst diff --git a/Lib/test/test_cppext/__init__.py b/Lib/test/test_cppext/__init__.py index efd79448c66104..d5195227308fec 100644 --- a/Lib/test/test_cppext/__init__.py +++ b/Lib/test/test_cppext/__init__.py @@ -41,12 +41,17 @@ def test_build_cpp11(self): def test_build_cpp14(self): self.check_build('_testcpp14ext', std='c++14') - def check_build(self, extension_name, std=None): + @support.requires_gil_enabled('incompatible with Free Threading') + def test_build_limited(self): + self.check_build('_testcppext_limited', limited=True) + + def check_build(self, extension_name, std=None, limited=False): venv_dir = 'env' with support.setup_venv_with_pip_setuptools_wheel(venv_dir) as python_exe: - self._check_build(extension_name, python_exe, std=std) + self._check_build(extension_name, python_exe, + std=std, limited=limited) - def _check_build(self, extension_name, python_exe, std): + def _check_build(self, extension_name, python_exe, std, limited): pkg_dir = 'pkg' os.mkdir(pkg_dir) shutil.copy(SETUP, os.path.join(pkg_dir, os.path.basename(SETUP))) @@ -56,6 +61,8 @@ def run_cmd(operation, cmd): env = os.environ.copy() if std: env['CPYTHON_TEST_CPP_STD'] = std + if limited: + env['CPYTHON_TEST_LIMITED'] = '1' env['CPYTHON_TEST_EXT_NAME'] = extension_name if support.verbose: print('Run:', ' '.join(map(shlex.quote, cmd))) diff --git a/Lib/test/test_cppext/extension.cpp b/Lib/test/test_cppext/extension.cpp index ab485b629b7788..500d5918145c00 100644 --- a/Lib/test/test_cppext/extension.cpp +++ b/Lib/test/test_cppext/extension.cpp @@ -62,6 +62,7 @@ test_api_casts(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) Py_ssize_t refcnt = Py_REFCNT(obj); assert(refcnt >= 1); +#ifndef Py_LIMITED_API // gh-92138: For backward compatibility, functions of Python C API accepts // "const PyObject*". Check that using it does not emit C++ compiler // warnings. @@ -74,6 +75,7 @@ test_api_casts(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) assert(PyTuple_GET_SIZE(const_obj) == 2); PyObject *one = PyTuple_GET_ITEM(const_obj, 0); assert(PyLong_AsLong(one) == 1); +#endif // gh-92898: StrongRef doesn't inherit from PyObject but has an operator to // cast to PyObject*. @@ -106,6 +108,12 @@ test_unicode(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) } assert(PyUnicode_Check(str)); + + assert(PyUnicode_GetLength(str) == 3); + assert(PyUnicode_ReadChar(str, 0) == 'a'); + assert(PyUnicode_ReadChar(str, 1) == 'b'); + +#ifndef Py_LIMITED_API assert(PyUnicode_GET_LENGTH(str) == 3); // gh-92800: test PyUnicode_READ() @@ -121,6 +129,7 @@ test_unicode(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) assert(PyUnicode_READ(ukind, const_data, 2) == 'c'); assert(PyUnicode_READ_CHAR(str, 1) == 'b'); +#endif Py_DECREF(str); Py_RETURN_NONE; diff --git a/Lib/test/test_cppext/setup.py b/Lib/test/test_cppext/setup.py index d97b238b8d1477..019ff18446a2eb 100644 --- a/Lib/test/test_cppext/setup.py +++ b/Lib/test/test_cppext/setup.py @@ -33,6 +33,7 @@ def main(): cppflags = list(CPPFLAGS) std = os.environ.get("CPYTHON_TEST_CPP_STD", "") module_name = os.environ["CPYTHON_TEST_EXT_NAME"] + limited = bool(os.environ.get("CPYTHON_TEST_LIMITED", "")) cppflags = list(CPPFLAGS) cppflags.append(f'-DMODULE_NAME={module_name}') @@ -59,6 +60,11 @@ def main(): # CC env var overrides sysconfig CC variable in setuptools os.environ['CC'] = cmd + # Define Py_LIMITED_API macro + if limited: + version = sys.hexversion + cppflags.append(f'-DPy_LIMITED_API={version:#x}') + # On Windows, add PCbuild\amd64\ to include and library directories include_dirs = [] library_dirs = [] diff --git a/Misc/NEWS.d/next/Tests/2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst b/Misc/NEWS.d/next/Tests/2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst new file mode 100644 index 00000000000000..6f577e741dff7f --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst @@ -0,0 +1 @@ +Test the limited C API in test_cppext. Patch by Victor Stinner. From 6446408d426814bf2bc9d3911a91741f04d4bc4e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Dec 2024 14:24:48 +0100 Subject: [PATCH 17/19] gh-102471, PEP 757: Add PyLong import and export API (#121339) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sergey B Kirpichev Co-authored-by: Steve Dower Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/c-api/long.rst | 174 ++++++++++++++++++ Doc/data/refcounts.dat | 7 + Doc/whatsnew/3.14.rst | 11 ++ Include/cpython/longintrepr.h | 38 ++++ Lib/test/test_capi/test_long.py | 91 +++++++++ ...-07-03-17-26-53.gh-issue-102471.XpmKYk.rst | 10 + Modules/_testcapi/long.c | 124 +++++++++++++ Objects/longobject.c | 120 ++++++++++++ Tools/c-analyzer/cpython/ignored.tsv | 1 + 9 files changed, 576 insertions(+) create mode 100644 Misc/NEWS.d/next/C_API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index cb12d43d92026f..f48cd07a979f56 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -653,3 +653,177 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.12 + +Export API +^^^^^^^^^^ + +.. versionadded:: next + +.. c:struct:: PyLongLayout + + Layout of an array of "digits" ("limbs" in the GMP terminology), used to + represent absolute value for arbitrary precision integers. + + Use :c:func:`PyLong_GetNativeLayout` to get the native layout of Python + :class:`int` objects, used internally for integers with "big enough" + absolute value. + + See also :data:`sys.int_info` which exposes similar information in Python. + + .. c:member:: uint8_t bits_per_digit + + Bits per digit. For example, a 15 bit digit means that bits 0-14 contain + meaningful information. + + .. c:member:: uint8_t digit_size + + Digit size in bytes. For example, a 15 bit digit will require at least 2 + bytes. + + .. c:member:: int8_t digits_order + + Digits order: + + - ``1`` for most significant digit first + - ``-1`` for least significant digit first + + .. c:member:: int8_t digit_endianness + + Digit endianness: + + - ``1`` for most significant byte first (big endian) + - ``-1`` for least significant byte first (little endian) + + +.. c:function:: const PyLongLayout* PyLong_GetNativeLayout(void) + + Get the native layout of Python :class:`int` objects. + + See the :c:struct:`PyLongLayout` structure. + + The function must not be called before Python initialization nor after + Python finalization. The returned layout is valid until Python is + finalized. The layout is the same for all Python sub-interpreters + in a process, and so it can be cached. + + +.. c:struct:: PyLongExport + + Export of a Python :class:`int` object. + + There are two cases: + + * If :c:member:`digits` is ``NULL``, only use the :c:member:`value` member. + * If :c:member:`digits` is not ``NULL``, use :c:member:`negative`, + :c:member:`ndigits` and :c:member:`digits` members. + + .. c:member:: int64_t value + + The native integer value of the exported :class:`int` object. + Only valid if :c:member:`digits` is ``NULL``. + + .. c:member:: uint8_t negative + + ``1`` if the number is negative, ``0`` otherwise. + Only valid if :c:member:`digits` is not ``NULL``. + + .. c:member:: Py_ssize_t ndigits + + Number of digits in :c:member:`digits` array. + Only valid if :c:member:`digits` is not ``NULL``. + + .. c:member:: const void *digits + + Read-only array of unsigned digits. Can be ``NULL``. + + +.. c:function:: int PyLong_Export(PyObject *obj, PyLongExport *export_long) + + Export a Python :class:`int` object. + + *export_long* must point to a :c:struct:`PyLongExport` structure allocated + by the caller. It must not be ``NULL``. + + On success, fill in *\*export_long* and return ``0``. + On error, set an exception and return ``-1``. + + :c:func:`PyLong_FreeExport` must be called when the export is no longer + needed. + + .. impl-detail:: + This function always succeeds if *obj* is a Python :class:`int` object + or a subclass. + + +.. c:function:: void PyLong_FreeExport(PyLongExport *export_long) + + Release the export *export_long* created by :c:func:`PyLong_Export`. + + .. impl-detail:: + Calling :c:func:`PyLong_FreeExport` is optional if *export_long->digits* + is ``NULL``. + + +PyLongWriter API +^^^^^^^^^^^^^^^^ + +The :c:type:`PyLongWriter` API can be used to import an integer. + +.. versionadded:: next + +.. c:struct:: PyLongWriter + + A Python :class:`int` writer instance. + + The instance must be destroyed by :c:func:`PyLongWriter_Finish` or + :c:func:`PyLongWriter_Discard`. + + +.. c:function:: PyLongWriter* PyLongWriter_Create(int negative, Py_ssize_t ndigits, void **digits) + + Create a :c:type:`PyLongWriter`. + + On success, allocate *\*digits* and return a writer. + On error, set an exception and return ``NULL``. + + *negative* is ``1`` if the number is negative, or ``0`` otherwise. + + *ndigits* is the number of digits in the *digits* array. It must be + greater than 0. + + *digits* must not be NULL. + + After a successful call to this function, the caller should fill in the + array of digits *digits* and then call :c:func:`PyLongWriter_Finish` to get + a Python :class:`int`. + The layout of *digits* is described by :c:func:`PyLong_GetNativeLayout`. + + Digits must be in the range [``0``; ``(1 << bits_per_digit) - 1``] + (where the :c:struct:`~PyLongLayout.bits_per_digit` is the number of bits + per digit). + Any unused most significant digits must be set to ``0``. + + Alternately, call :c:func:`PyLongWriter_Discard` to destroy the writer + instance without creating an :class:`~int` object. + + +.. c:function:: PyObject* PyLongWriter_Finish(PyLongWriter *writer) + + Finish a :c:type:`PyLongWriter` created by :c:func:`PyLongWriter_Create`. + + On success, return a Python :class:`int` object. + On error, set an exception and return ``NULL``. + + The function takes care of normalizing the digits and converts the object + to a compact integer if needed. + + The writer instance and the *digits* array are invalid after the call. + + +.. c:function:: void PyLongWriter_Discard(PyLongWriter *writer) + + Discard a :c:type:`PyLongWriter` created by :c:func:`PyLongWriter_Create`. + + *writer* must not be ``NULL``. + + The writer instance and the *digits* array are invalid after the call. diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index a043af48ba7a05..e78754e24e23d8 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1299,6 +1299,13 @@ PyLong_GetSign:int::: PyLong_GetSign:PyObject*:v:0: PyLong_GetSign:int*:sign:: +PyLong_Export:int::: +PyLong_Export:PyObject*:obj:0: +PyLong_Export:PyLongExport*:export_long:: + +PyLongWriter_Finish:PyObject*::+1: +PyLongWriter_Finish:PyLongWriter*:writer:: + PyMapping_Check:int::: PyMapping_Check:PyObject*:o:0: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b71d31f9742fe0..5ce398ab93d6b4 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1018,6 +1018,17 @@ New features (Contributed by Victor Stinner in :gh:`107954`.) +* Add a new import and export API for Python :class:`int` objects (:pep:`757`): + + * :c:func:`PyLong_GetNativeLayout`; + * :c:func:`PyLong_Export`; + * :c:func:`PyLong_FreeExport`; + * :c:func:`PyLongWriter_Create`; + * :c:func:`PyLongWriter_Finish`; + * :c:func:`PyLongWriter_Discard`. + + (Contributed by Victor Stinner in :gh:`102471`.) + * Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot for easier superclass identification, which attempts to resolve the `type checking issue `__ mentioned in :pep:`630` diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h index c60ccc463653f9..357477b60d9a5a 100644 --- a/Include/cpython/longintrepr.h +++ b/Include/cpython/longintrepr.h @@ -139,6 +139,44 @@ _PyLong_CompactValue(const PyLongObject *op) #define PyUnstable_Long_CompactValue _PyLong_CompactValue +/* --- Import/Export API -------------------------------------------------- */ + +typedef struct PyLongLayout { + uint8_t bits_per_digit; + uint8_t digit_size; + int8_t digits_order; + int8_t digit_endianness; +} PyLongLayout; + +PyAPI_FUNC(const PyLongLayout*) PyLong_GetNativeLayout(void); + +typedef struct PyLongExport { + int64_t value; + uint8_t negative; + Py_ssize_t ndigits; + const void *digits; + // Member used internally, must not be used for other purpose. + Py_uintptr_t _reserved; +} PyLongExport; + +PyAPI_FUNC(int) PyLong_Export( + PyObject *obj, + PyLongExport *export_long); +PyAPI_FUNC(void) PyLong_FreeExport( + PyLongExport *export_long); + + +/* --- PyLongWriter API --------------------------------------------------- */ + +typedef struct PyLongWriter PyLongWriter; + +PyAPI_FUNC(PyLongWriter*) PyLongWriter_Create( + int negative, + Py_ssize_t ndigits, + void **digits); +PyAPI_FUNC(PyObject*) PyLongWriter_Finish(PyLongWriter *writer); +PyAPI_FUNC(void) PyLongWriter_Discard(PyLongWriter *writer); + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index a77094588a0edf..d45ac75c822ea9 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -10,6 +10,7 @@ NULL = None + class IntSubclass(int): pass @@ -714,5 +715,95 @@ def test_long_asuint64(self): self.check_long_asint(as_uint64, 0, UINT64_MAX, negative_value_error=ValueError) + def test_long_layout(self): + # Test PyLong_GetNativeLayout() + int_info = sys.int_info + layout = _testcapi.get_pylong_layout() + expected = { + 'bits_per_digit': int_info.bits_per_digit, + 'digit_size': int_info.sizeof_digit, + 'digits_order': -1, + 'digit_endianness': -1 if sys.byteorder == 'little' else 1, + } + self.assertEqual(layout, expected) + + def test_long_export(self): + # Test PyLong_Export() + layout = _testcapi.get_pylong_layout() + base = 2 ** layout['bits_per_digit'] + + pylong_export = _testcapi.pylong_export + + # value fits into int64_t + self.assertEqual(pylong_export(0), 0) + self.assertEqual(pylong_export(123), 123) + self.assertEqual(pylong_export(-123), -123) + self.assertEqual(pylong_export(IntSubclass(123)), 123) + + # use an array, doesn't fit into int64_t + self.assertEqual(pylong_export(base**10 * 2 + 1), + (0, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])) + self.assertEqual(pylong_export(-(base**10 * 2 + 1)), + (1, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])) + self.assertEqual(pylong_export(IntSubclass(base**10 * 2 + 1)), + (0, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])) + + self.assertRaises(TypeError, pylong_export, 1.0) + self.assertRaises(TypeError, pylong_export, 0+1j) + self.assertRaises(TypeError, pylong_export, "abc") + + def test_longwriter_create(self): + # Test PyLongWriter_Create() + layout = _testcapi.get_pylong_layout() + base = 2 ** layout['bits_per_digit'] + + pylongwriter_create = _testcapi.pylongwriter_create + self.assertRaises(ValueError, pylongwriter_create, 0, []) + self.assertRaises(ValueError, pylongwriter_create, -123, []) + self.assertEqual(pylongwriter_create(0, [0]), 0) + self.assertEqual(pylongwriter_create(0, [123]), 123) + self.assertEqual(pylongwriter_create(1, [123]), -123) + self.assertEqual(pylongwriter_create(1, [1, 2]), + -(base * 2 + 1)) + self.assertEqual(pylongwriter_create(0, [1, 2, 3]), + base**2 * 3 + base * 2 + 1) + max_digit = base - 1 + self.assertEqual(pylongwriter_create(0, [max_digit, max_digit, max_digit]), + base**2 * max_digit + base * max_digit + max_digit) + + # normalize + self.assertEqual(pylongwriter_create(0, [123, 0, 0]), 123) + + # test singletons + normalize + for num in (-2, 0, 1, 5, 42, 100): + self.assertIs(pylongwriter_create(bool(num < 0), [abs(num), 0]), + num) + + def to_digits(num): + digits = [] + while True: + num, digit = divmod(num, base) + digits.append(digit) + if not num: + break + return digits + + # round trip: Python int -> export -> Python int + pylong_export = _testcapi.pylong_export + numbers = [*range(0, 10), 12345, 0xdeadbeef, 2**100, 2**100-1] + numbers.extend(-num for num in list(numbers)) + for num in numbers: + with self.subTest(num=num): + data = pylong_export(num) + if isinstance(data, tuple): + negative, digits = data + else: + value = data + negative = int(value < 0) + digits = to_digits(abs(value)) + self.assertEqual(pylongwriter_create(negative, digits), num, + (negative, digits)) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C_API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst b/Misc/NEWS.d/next/C_API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst new file mode 100644 index 00000000000000..c18c159ac87d08 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst @@ -0,0 +1,10 @@ +Add a new import and export API for Python :class:`int` objects (:pep:`757`): + +* :c:func:`PyLong_GetNativeLayout`; +* :c:func:`PyLong_Export`; +* :c:func:`PyLong_FreeExport`; +* :c:func:`PyLongWriter_Create`; +* :c:func:`PyLongWriter_Finish`; +* :c:func:`PyLongWriter_Discard`. + +Patch by Victor Stinner. diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index ebea09080ef11c..42243023a45768 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -141,6 +141,127 @@ pylong_aspid(PyObject *module, PyObject *arg) } +static PyObject * +layout_to_dict(const PyLongLayout *layout) +{ + return Py_BuildValue("{sisisisi}", + "bits_per_digit", (int)layout->bits_per_digit, + "digit_size", (int)layout->digit_size, + "digits_order", (int)layout->digits_order, + "digit_endianness", (int)layout->digit_endianness); +} + + +static PyObject * +pylong_export(PyObject *module, PyObject *obj) +{ + PyLongExport export_long; + if (PyLong_Export(obj, &export_long) < 0) { + return NULL; + } + + if (export_long.digits == NULL) { + assert(export_long.negative == 0); + assert(export_long.ndigits == 0); + assert(export_long.digits == NULL); + PyObject *res = PyLong_FromInt64(export_long.value); + PyLong_FreeExport(&export_long); + return res; + } + + assert(PyLong_GetNativeLayout()->digit_size == sizeof(digit)); + const digit *export_long_digits = export_long.digits; + + PyObject *digits = PyList_New(0); + if (digits == NULL) { + goto error; + } + for (Py_ssize_t i = 0; i < export_long.ndigits; i++) { + PyObject *item = PyLong_FromUnsignedLong(export_long_digits[i]); + if (item == NULL) { + goto error; + } + + if (PyList_Append(digits, item) < 0) { + Py_DECREF(item); + goto error; + } + Py_DECREF(item); + } + + assert(export_long.value == 0); + PyObject *res = Py_BuildValue("(iN)", export_long.negative, digits); + + PyLong_FreeExport(&export_long); + assert(export_long._reserved == 0); + + return res; + +error: + Py_XDECREF(digits); + PyLong_FreeExport(&export_long); + return NULL; +} + + +static PyObject * +pylongwriter_create(PyObject *module, PyObject *args) +{ + int negative; + PyObject *list; + // TODO(vstinner): write test for negative ndigits and digits==NULL + if (!PyArg_ParseTuple(args, "iO!", &negative, &PyList_Type, &list)) { + return NULL; + } + Py_ssize_t ndigits = PyList_GET_SIZE(list); + + digit *digits = PyMem_Malloc((size_t)ndigits * sizeof(digit)); + if (digits == NULL) { + return PyErr_NoMemory(); + } + + for (Py_ssize_t i = 0; i < ndigits; i++) { + PyObject *item = PyList_GET_ITEM(list, i); + + long num = PyLong_AsLong(item); + if (num == -1 && PyErr_Occurred()) { + goto error; + } + + if (num < 0 || num >= PyLong_BASE) { + PyErr_SetString(PyExc_ValueError, "digit doesn't fit into digit"); + goto error; + } + digits[i] = (digit)num; + } + + void *writer_digits; + PyLongWriter *writer = PyLongWriter_Create(negative, ndigits, + &writer_digits); + if (writer == NULL) { + goto error; + } + assert(PyLong_GetNativeLayout()->digit_size == sizeof(digit)); + memcpy(writer_digits, digits, (size_t)ndigits * sizeof(digit)); + PyObject *res = PyLongWriter_Finish(writer); + PyMem_Free(digits); + + return res; + +error: + PyMem_Free(digits); + return NULL; +} + + +static PyObject * +get_pylong_layout(PyObject *module, PyObject *Py_UNUSED(args)) +{ + const PyLongLayout *layout = PyLong_GetNativeLayout(); + return layout_to_dict(layout); +} + + static PyMethodDef test_methods[] = { _TESTCAPI_CALL_LONG_COMPACT_API_METHODDEF {"pylong_fromunicodeobject", pylong_fromunicodeobject, METH_VARARGS}, @@ -148,6 +269,9 @@ static PyMethodDef test_methods[] = { {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, {"pylong_getsign", pylong_getsign, METH_O}, {"pylong_aspid", pylong_aspid, METH_O}, + {"pylong_export", pylong_export, METH_O}, + {"pylongwriter_create", pylongwriter_create, METH_VARARGS}, + {"get_pylong_layout", get_pylong_layout, METH_NOARGS}, {"pylong_ispositive", pylong_ispositive, METH_O}, {"pylong_isnegative", pylong_isnegative, METH_O}, {"pylong_iszero", pylong_iszero, METH_O}, diff --git a/Objects/longobject.c b/Objects/longobject.c index 96d59f542a7c3c..bd7ff68d0899c6 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6750,6 +6750,7 @@ PyUnstable_Long_CompactValue(const PyLongObject* op) { return _PyLong_CompactValue((PyLongObject*)op); } + PyObject* PyLong_FromInt32(int32_t value) { return PyLong_FromNativeBytes(&value, sizeof(value), -1); } @@ -6815,3 +6816,122 @@ int PyLong_AsUInt64(PyObject *obj, uint64_t *value) { LONG_TO_UINT(obj, value, "C uint64_t"); } + + +static const PyLongLayout PyLong_LAYOUT = { + .bits_per_digit = PyLong_SHIFT, + .digits_order = -1, // least significant first + .digit_endianness = PY_LITTLE_ENDIAN ? -1 : 1, + .digit_size = sizeof(digit), +}; + + +const PyLongLayout* +PyLong_GetNativeLayout(void) +{ + return &PyLong_LAYOUT; +} + + +int +PyLong_Export(PyObject *obj, PyLongExport *export_long) +{ + if (!PyLong_Check(obj)) { + memset(export_long, 0, sizeof(*export_long)); + PyErr_Format(PyExc_TypeError, "expect int, got %T", obj); + return -1; + } + + // Fast-path: try to convert to a int64_t + int overflow; +#if SIZEOF_LONG == 8 + long value = PyLong_AsLongAndOverflow(obj, &overflow); +#else + // Windows has 32-bit long, so use 64-bit long long instead + long long value = PyLong_AsLongLongAndOverflow(obj, &overflow); +#endif + Py_BUILD_ASSERT(sizeof(value) == sizeof(int64_t)); + // the function cannot fail since obj is a PyLongObject + assert(!(value == -1 && PyErr_Occurred())); + + if (!overflow) { + export_long->value = value; + export_long->negative = 0; + export_long->ndigits = 0; + export_long->digits = NULL; + export_long->_reserved = 0; + } + else { + PyLongObject *self = (PyLongObject*)obj; + export_long->value = 0; + export_long->negative = _PyLong_IsNegative(self); + export_long->ndigits = _PyLong_DigitCount(self); + if (export_long->ndigits == 0) { + export_long->ndigits = 1; + } + export_long->digits = self->long_value.ob_digit; + export_long->_reserved = (Py_uintptr_t)Py_NewRef(obj); + } + return 0; +} + + +void +PyLong_FreeExport(PyLongExport *export_long) +{ + PyObject *obj = (PyObject*)export_long->_reserved; + if (obj) { + export_long->_reserved = 0; + Py_DECREF(obj); + } +} + + +/* --- PyLongWriter API --------------------------------------------------- */ + +PyLongWriter* +PyLongWriter_Create(int negative, Py_ssize_t ndigits, void **digits) +{ + if (ndigits <= 0) { + PyErr_SetString(PyExc_ValueError, "ndigits must be positive"); + goto error; + } + assert(digits != NULL); + + PyLongObject *obj = _PyLong_New(ndigits); + if (obj == NULL) { + goto error; + } + if (negative) { + _PyLong_FlipSign(obj); + } + + *digits = obj->long_value.ob_digit; + return (PyLongWriter*)obj; + +error: + *digits = NULL; + return NULL; +} + + +void +PyLongWriter_Discard(PyLongWriter *writer) +{ + PyLongObject *obj = (PyLongObject *)writer; + assert(Py_REFCNT(obj) == 1); + Py_DECREF(obj); +} + + +PyObject* +PyLongWriter_Finish(PyLongWriter *writer) +{ + PyLongObject *obj = (PyLongObject *)writer; + assert(Py_REFCNT(obj) == 1); + + // Normalize and get singleton if possible + obj = maybe_small_long(long_normalize(obj)); + + return (PyObject*)obj; +} diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 686f3935d91bda..c8c30a7985aa2e 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -319,6 +319,7 @@ Objects/exceptions.c - static_exceptions - Objects/genobject.c - ASYNC_GEN_IGNORED_EXIT_MSG - Objects/genobject.c - NON_INIT_CORO_MSG - Objects/longobject.c - _PyLong_DigitValue - +Objects/longobject.c - PyLong_LAYOUT - Objects/object.c - _Py_SwappedOp - Objects/object.c - _Py_abstract_hack - Objects/object.c - last_final_reftotal - From 8bc18182a7c28f86265c9d82bd0338137480921c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:16:22 +0100 Subject: [PATCH 18/19] gh-127691: add type checks when using `PyUnicodeError` objects (GH-127694) --- Doc/whatsnew/3.14.rst | 6 + ...-12-06-16-53-34.gh-issue-127691.k_Jitp.rst | 3 + Objects/exceptions.c | 216 ++++++++++++------ 3 files changed, 157 insertions(+), 68 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5ce398ab93d6b4..095949242c09d9 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1045,6 +1045,12 @@ New features * Add :c:func:`PyUnstable_Object_EnableDeferredRefcount` for enabling deferred reference counting, as outlined in :pep:`703`. +* The :ref:`Unicode Exception Objects ` C API + now raises a :exc:`TypeError` if its exception argument is not + a :exc:`UnicodeError` object. + (Contributed by Bénédikt Tran in :gh:`127691`.) + + Porting to Python 3.14 ---------------------- diff --git a/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst b/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst new file mode 100644 index 00000000000000..c942ff3d9eda53 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst @@ -0,0 +1,3 @@ +The :ref:`Unicode Exception Objects ` C API +now raises a :exc:`TypeError` if its exception argument is not +a :exc:`UnicodeError` object. Patch by Bénédikt Tran. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 287cbc25305964..6880c24196cbb8 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2668,7 +2668,7 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, "Unicode related error."); static PyObject * -get_string(PyObject *attr, const char *name) +get_bytes(PyObject *attr, const char *name) { if (!attr) { PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); @@ -2748,40 +2748,74 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) return end; } +#define _PyUnicodeError_CAST(PTR) ((PyUnicodeErrorObject *)(PTR)) +#define PyUnicodeError_Check(PTR) \ + PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError) +#define PyUnicodeError_CAST(PTR) \ + (assert(PyUnicodeError_Check(PTR)), _PyUnicodeError_CAST(PTR)) + + +static inline int +check_unicode_error_type(PyObject *self, const char *expect_type) +{ + if (!PyUnicodeError_Check(self)) { + PyErr_Format(PyExc_TypeError, + "expecting a %s object, got %T", expect_type, self); + return -1; + } + return 0; +} + + +static inline PyUnicodeErrorObject * +as_unicode_error(PyObject *self, const char *expect_type) +{ + int rc = check_unicode_error_type(self, expect_type); + return rc < 0 ? NULL : _PyUnicodeError_CAST(self); +} + PyObject * -PyUnicodeEncodeError_GetEncoding(PyObject *exc) +PyUnicodeEncodeError_GetEncoding(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding"); } PyObject * -PyUnicodeDecodeError_GetEncoding(PyObject *exc) +PyUnicodeDecodeError_GetEncoding(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding"); } PyObject * -PyUnicodeEncodeError_GetObject(PyObject *exc) +PyUnicodeEncodeError_GetObject(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->object, "object"); } PyObject * -PyUnicodeDecodeError_GetObject(PyObject *exc) +PyUnicodeDecodeError_GetObject(PyObject *self) { - return get_string(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_bytes(exc->object, "object"); } PyObject * -PyUnicodeTranslateError_GetObject(PyObject *exc) +PyUnicodeTranslateError_GetObject(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? NULL : get_unicode(exc->object, "object"); } int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + if (exc == NULL) { + return -1; + } PyObject *obj = get_unicode(exc->object, "object"); if (obj == NULL) { return -1; @@ -2796,8 +2830,11 @@ PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_string(exc->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_bytes(exc->object, "object"); if (obj == NULL) { return -1; } @@ -2809,45 +2846,63 @@ PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) int -PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - return PyUnicodeEncodeError_GetStart(exc, start); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { + return -1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); + return 0; } static inline int unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) { - ((PyUnicodeErrorObject *)self)->start = start; + PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self); + exc->start = start; return 0; } int -PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int -PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int -PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + if (exc == NULL) { + return -1; + } PyObject *obj = get_unicode(exc->object, "object"); if (obj == NULL) { return -1; @@ -2862,8 +2917,11 @@ PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_string(exc->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_bytes(exc->object, "object"); if (obj == NULL) { return -1; } @@ -2875,108 +2933,130 @@ PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) int -PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - return PyUnicodeEncodeError_GetEnd(exc, end); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { + return -1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); + return 0; } static inline int -unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) +unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; + PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self); + exc->end = end; return 0; } int -PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } int -PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } int -PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } + PyObject * -PyUnicodeEncodeError_GetReason(PyObject *exc) +PyUnicodeEncodeError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } PyObject * -PyUnicodeDecodeError_GetReason(PyObject *exc) +PyUnicodeDecodeError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } PyObject * -PyUnicodeTranslateError_GetReason(PyObject *exc) +PyUnicodeTranslateError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } int -PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason) +PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } int -PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason) +PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } int -PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason) +PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } static int -UnicodeError_clear(PyUnicodeErrorObject *self) +UnicodeError_clear(PyObject *self) { - Py_CLEAR(self->encoding); - Py_CLEAR(self->object); - Py_CLEAR(self->reason); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + Py_CLEAR(exc->encoding); + Py_CLEAR(exc->object); + Py_CLEAR(exc->reason); return BaseException_clear((PyBaseExceptionObject *)self); } static void -UnicodeError_dealloc(PyUnicodeErrorObject *self) +UnicodeError_dealloc(PyObject *self) { + PyTypeObject *type = Py_TYPE(self); _PyObject_GC_UNTRACK(self); - UnicodeError_clear(self); - Py_TYPE(self)->tp_free((PyObject *)self); + (void)UnicodeError_clear(self); + type->tp_free(self); } static int -UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg) +UnicodeError_traverse(PyObject *self, visitproc visit, void *arg) { - Py_VISIT(self->encoding); - Py_VISIT(self->object); - Py_VISIT(self->reason); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + Py_VISIT(exc->encoding); + Py_VISIT(exc->object); + Py_VISIT(exc->reason); return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); } @@ -3015,7 +3095,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; @@ -3027,7 +3107,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeEncodeError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL; PyObject *encoding_str = NULL; @@ -3135,7 +3215,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) } } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->object, object /* already a strong reference */); exc->start = start; @@ -3147,7 +3227,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeDecodeError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL; PyObject *encoding_str = NULL; @@ -3236,7 +3316,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; exc->end = end; @@ -3248,7 +3328,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeTranslateError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL; From 5dd775bed086909722ec7014a7c4f77a35f74a80 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 14 Dec 2024 01:21:46 +0900 Subject: [PATCH 19/19] gh-126024: unicodeobject: optimize find_first_nonascii (GH-127790) Remove 1 branch. --- Objects/unicodeobject.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 33c4747bbef488..b7aeb06d32bcec 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5077,21 +5077,24 @@ load_unaligned(const unsigned char *p, size_t size) static Py_ssize_t find_first_nonascii(const unsigned char *start, const unsigned char *end) { + // The search is done in `size_t` chunks. + // The start and end might not be aligned at `size_t` boundaries, + // so they're handled specially. + const unsigned char *p = start; if (end - start >= SIZEOF_SIZE_T) { - const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); + // Avoid unaligned read. #if PY_LITTLE_ENDIAN && HAVE_CTZ - if (p < p2) { - size_t u; - memcpy(&u, p, sizeof(size_t)); - u &= ASCII_CHAR_MASK; - if (u) { - return (ctz(u) - 7) / 8; - } - p = p2; + size_t u; + memcpy(&u, p, sizeof(size_t)); + u &= ASCII_CHAR_MASK; + if (u) { + return (ctz(u) - 7) / 8; } + p = _Py_ALIGN_DOWN(p + SIZEOF_SIZE_T, SIZEOF_SIZE_T); #else /* PY_LITTLE_ENDIAN && HAVE_CTZ */ + const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); while (p < p2) { if (*p & 0x80) { return p - start; @@ -5099,6 +5102,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) p++; } #endif + const unsigned char *e = end - SIZEOF_SIZE_T; while (p <= e) { size_t u = (*(const size_t *)p) & ASCII_CHAR_MASK; @@ -5115,6 +5119,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) } } #if PY_LITTLE_ENDIAN && HAVE_CTZ + assert((end - p) < SIZEOF_SIZE_T); // we can not use *(const size_t*)p to avoid buffer overrun. size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK; if (u) {