From 8dbdbad6e085564ec20109fc8277fa54b3fcc2db Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Tue, 10 Dec 2024 19:22:37 +0900 Subject: [PATCH 01/46] gh-101100: Fix sphinx warnings in `whatsnew/3.0.rst` (#127662) --- Doc/library/xmlrpc.rst | 3 ++ Doc/tools/.nitignore | 1 - Doc/whatsnew/3.0.rst | 112 ++++++++++++++++++++--------------------- 3 files changed, 59 insertions(+), 57 deletions(-) diff --git a/Doc/library/xmlrpc.rst b/Doc/library/xmlrpc.rst index 5f0a2cf68d01f9..a93d08f78cfba7 100644 --- a/Doc/library/xmlrpc.rst +++ b/Doc/library/xmlrpc.rst @@ -1,6 +1,9 @@ :mod:`!xmlrpc` --- XMLRPC server and client modules =================================================== +.. module:: xmlrpc + :synopsis: Server and client modules implementing XML-RPC. + XML-RPC is a Remote Procedure Call method that uses XML passed via HTTP as a transport. With it, a client can call methods with parameters on a remote server (the server is named by a URI) and get back structured data. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 39d1f5975e331c..7d50aec56a9bf7 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -76,7 +76,6 @@ Doc/whatsnew/2.4.rst Doc/whatsnew/2.5.rst Doc/whatsnew/2.6.rst Doc/whatsnew/2.7.rst -Doc/whatsnew/3.0.rst Doc/whatsnew/3.3.rst Doc/whatsnew/3.4.rst Doc/whatsnew/3.5.rst diff --git a/Doc/whatsnew/3.0.rst b/Doc/whatsnew/3.0.rst index d97f5fdd9eaa4a..6e1fda22ed203a 100644 --- a/Doc/whatsnew/3.0.rst +++ b/Doc/whatsnew/3.0.rst @@ -150,8 +150,8 @@ Some well-known APIs no longer return lists: sorted(d)`` instead (this works in Python 2.5 too and is just as efficient). -* Also, the :meth:`dict.iterkeys`, :meth:`dict.iteritems` and - :meth:`dict.itervalues` methods are no longer supported. +* Also, the :meth:`!dict.iterkeys`, :meth:`!dict.iteritems` and + :meth:`!dict.itervalues` methods are no longer supported. * :func:`map` and :func:`filter` return iterators. If you really need a list and the input sequences are all of equal length, a quick @@ -170,7 +170,7 @@ Some well-known APIs no longer return lists: :func:`itertools.zip_longest`, e.g. ``map(func, *sequences)`` becomes ``list(map(func, itertools.zip_longest(*sequences)))``. -* :func:`range` now behaves like :func:`xrange` used to behave, except +* :func:`range` now behaves like :func:`!xrange` used to behave, except it works with values of arbitrary size. The latter no longer exists. @@ -192,33 +192,33 @@ Python 3.0 has simplified the rules for ordering comparisons: operators: objects of different incomparable types always compare unequal to each other. -* :meth:`builtin.sorted` and :meth:`list.sort` no longer accept the +* :meth:`sorted` and :meth:`list.sort` no longer accept the *cmp* argument providing a comparison function. Use the *key* argument instead. N.B. the *key* and *reverse* arguments are now "keyword-only". -* The :func:`cmp` function should be treated as gone, and the :meth:`__cmp__` - special method is no longer supported. Use :meth:`__lt__` for sorting, - :meth:`__eq__` with :meth:`__hash__`, and other rich comparisons as needed. - (If you really need the :func:`cmp` functionality, you could use the +* The :func:`!cmp` function should be treated as gone, and the :meth:`!__cmp__` + special method is no longer supported. Use :meth:`~object.__lt__` for sorting, + :meth:`~object.__eq__` with :meth:`~object.__hash__`, and other rich comparisons as needed. + (If you really need the :func:`!cmp` functionality, you could use the expression ``(a > b) - (a < b)`` as the equivalent for ``cmp(a, b)``.) Integers -------- -* :pep:`237`: Essentially, :class:`long` renamed to :class:`int`. +* :pep:`237`: Essentially, :class:`!long` renamed to :class:`int`. That is, there is only one built-in integral type, named - :class:`int`; but it behaves mostly like the old :class:`long` type. + :class:`int`; but it behaves mostly like the old :class:`!long` type. * :pep:`238`: An expression like ``1/2`` returns a float. Use ``1//2`` to get the truncating behavior. (The latter syntax has existed for years, at least since Python 2.2.) -* The :data:`sys.maxint` constant was removed, since there is no +* The :data:`!sys.maxint` constant was removed, since there is no longer a limit to the value of integers. However, :data:`sys.maxsize` can be used as an integer larger than any practical list or string index. It conforms to the implementation's "natural" integer size - and is typically the same as :data:`sys.maxint` in previous releases + and is typically the same as :data:`!sys.maxint` in previous releases on the same platform (assuming the same build options). * The :func:`repr` of a long integer doesn't include the trailing ``L`` @@ -251,7 +251,7 @@ changed. that uses Unicode, encodings or binary data most likely has to change. The change is for the better, as in the 2.x world there were numerous bugs having to do with mixing encoded and unencoded - text. To be prepared in Python 2.x, start using :class:`unicode` + text. To be prepared in Python 2.x, start using :class:`!unicode` for all unencoded text, and :class:`str` for binary or encoded data only. Then the ``2to3`` tool will do most of the work for you. @@ -269,7 +269,7 @@ changed. separate *mutable* type to hold buffered binary data, :class:`bytearray`. Nearly all APIs that accept :class:`bytes` also accept :class:`bytearray`. The mutable API is based on - :class:`collections.MutableSequence`. + :class:`collections.MutableSequence `. * All backslashes in raw string literals are interpreted literally. This means that ``'\U'`` and ``'\u'`` escapes in raw strings are not @@ -278,11 +278,11 @@ changed. single "euro" character. (Of course, this change only affects raw string literals; the euro character is ``'\u20ac'`` in Python 3.0.) -* The built-in :class:`basestring` abstract type was removed. Use +* The built-in :class:`!basestring` abstract type was removed. Use :class:`str` instead. The :class:`str` and :class:`bytes` types don't have functionality enough in common to warrant a shared base class. The ``2to3`` tool (see below) replaces every occurrence of - :class:`basestring` with :class:`str`. + :class:`!basestring` with :class:`str`. * Files opened as text files (still the default mode for :func:`open`) always use an encoding to map between strings (in memory) and bytes @@ -428,7 +428,7 @@ Changed Syntax class C(metaclass=M): ... - The module-global :data:`__metaclass__` variable is no longer + The module-global :data:`!__metaclass__` variable is no longer supported. (It was a crutch to make it easier to default to new-style classes without deriving every class from :class:`object`.) @@ -522,19 +522,19 @@ consulted for longer descriptions. *encoding*, *errors*, *newline* and *closefd*. Also note that an invalid *mode* argument now raises :exc:`ValueError`, not :exc:`IOError`. The binary file object underlying a text file - object can be accessed as :attr:`f.buffer` (but beware that the + object can be accessed as :attr:`!f.buffer` (but beware that the text object maintains a buffer of itself in order to speed up the encoding and decoding operations). -* :ref:`pep-3118`. The old builtin :func:`buffer` is now really gone; +* :ref:`pep-3118`. The old builtin :func:`!buffer` is now really gone; the new builtin :func:`memoryview` provides (mostly) similar functionality. * :ref:`pep-3119`. The :mod:`abc` module and the ABCs defined in the :mod:`collections` module plays a somewhat more prominent role in the language now, and built-in collection types like :class:`dict` - and :class:`list` conform to the :class:`collections.MutableMapping` - and :class:`collections.MutableSequence` ABCs, respectively. + and :class:`list` conform to the :class:`collections.MutableMapping ` + and :class:`collections.MutableSequence ` ABCs, respectively. * :ref:`pep-3127`. As mentioned above, the new octal literal notation is the only one supported, and binary literals have been @@ -612,7 +612,7 @@ review: :mod:`!CGIHTTPServer`, :mod:`!SimpleHTTPServer`, :mod:`!Cookie`, :mod:`!cookielib`). - * :mod:`tkinter` (all :mod:`Tkinter`-related modules except + * :mod:`tkinter` (all ``Tkinter``-related modules except :mod:`turtle`). The target audience of :mod:`turtle` doesn't really care about :mod:`tkinter`. Also note that as of Python 2.6, the functionality of :mod:`turtle` has been greatly enhanced. @@ -628,47 +628,47 @@ Some other changes to standard library modules, not covered by * Killed :mod:`!sets`. Use the built-in :func:`set` class. -* Cleanup of the :mod:`sys` module: removed :func:`sys.exitfunc`, - :func:`sys.exc_clear`, :data:`sys.exc_type`, :data:`sys.exc_value`, - :data:`sys.exc_traceback`. (Note that :data:`sys.last_type` +* Cleanup of the :mod:`sys` module: removed :func:`!sys.exitfunc`, + :func:`!sys.exc_clear`, :data:`!sys.exc_type`, :data:`!sys.exc_value`, + :data:`!sys.exc_traceback`. (Note that :data:`sys.last_type` etc. remain.) -* Cleanup of the :class:`array.array` type: the :meth:`read` and - :meth:`write` methods are gone; use :meth:`fromfile` and - :meth:`tofile` instead. Also, the ``'c'`` typecode for array is +* Cleanup of the :class:`array.array` type: the :meth:`!read` and + :meth:`!write` methods are gone; use :meth:`~array.array.fromfile` and + :meth:`~array.array.tofile` instead. Also, the ``'c'`` typecode for array is gone -- use either ``'b'`` for bytes or ``'u'`` for Unicode characters. * Cleanup of the :mod:`operator` module: removed - :func:`sequenceIncludes` and :func:`isCallable`. + :func:`!sequenceIncludes` and :func:`!isCallable`. * Cleanup of the :mod:`!thread` module: :func:`!acquire_lock` and :func:`!release_lock` are gone; use :meth:`~threading.Lock.acquire` and :meth:`~threading.Lock.release` instead. -* Cleanup of the :mod:`random` module: removed the :func:`jumpahead` API. +* Cleanup of the :mod:`random` module: removed the :func:`!jumpahead` API. * The :mod:`!new` module is gone. -* The functions :func:`os.tmpnam`, :func:`os.tempnam` and - :func:`os.tmpfile` have been removed in favor of the :mod:`tempfile` +* The functions :func:`!os.tmpnam`, :func:`!os.tempnam` and + :func:`!os.tmpfile` have been removed in favor of the :mod:`tempfile` module. * The :mod:`tokenize` module has been changed to work with bytes. The main entry point is now :func:`tokenize.tokenize`, instead of generate_tokens. -* :data:`string.letters` and its friends (:data:`string.lowercase` and - :data:`string.uppercase`) are gone. Use +* :data:`!string.letters` and its friends (:data:`!string.lowercase` and + :data:`!string.uppercase`) are gone. Use :data:`string.ascii_letters` etc. instead. (The reason for the - removal is that :data:`string.letters` and friends had + removal is that :data:`!string.letters` and friends had locale-specific behavior, which is a bad idea for such attractively named global "constants".) -* Renamed module :mod:`__builtin__` to :mod:`builtins` (removing the - underscores, adding an 's'). The :data:`__builtins__` variable +* Renamed module :mod:`!__builtin__` to :mod:`builtins` (removing the + underscores, adding an 's'). The :data:`!__builtins__` variable found in most global namespaces is unchanged. To modify a builtin, - you should use :mod:`builtins`, not :data:`__builtins__`! + you should use :mod:`builtins`, not :data:`!__builtins__`! :pep:`3101`: A New Approach To String Formatting @@ -702,9 +702,9 @@ new powerful features added: idiom for handling all exceptions except for this latter category is to use :keyword:`except` :exc:`Exception`. -* :exc:`StandardError` was removed. +* :exc:`!StandardError` was removed. -* Exceptions no longer behave as sequences. Use the :attr:`args` +* Exceptions no longer behave as sequences. Use the :attr:`~BaseException.args` attribute instead. * :pep:`3109`: Raising exceptions. You must now use :samp:`raise @@ -765,20 +765,20 @@ Operators And Special Methods When referencing a method as a class attribute, you now get a plain function object. -* :meth:`__getslice__`, :meth:`__setslice__` and :meth:`__delslice__` +* :meth:`!__getslice__`, :meth:`!__setslice__` and :meth:`!__delslice__` were killed. The syntax ``a[i:j]`` now translates to - ``a.__getitem__(slice(i, j))`` (or :meth:`__setitem__` or - :meth:`__delitem__`, when used as an assignment or deletion target, + ``a.__getitem__(slice(i, j))`` (or :meth:`~object.__setitem__` or + :meth:`~object.__delitem__`, when used as an assignment or deletion target, respectively). * :pep:`3114`: the standard :meth:`next` method has been renamed to :meth:`~iterator.__next__`. -* The :meth:`__oct__` and :meth:`__hex__` special methods are removed - -- :func:`oct` and :func:`hex` use :meth:`__index__` now to convert +* The :meth:`!__oct__` and :meth:`!__hex__` special methods are removed + -- :func:`oct` and :func:`hex` use :meth:`~object.__index__` now to convert the argument to an integer. -* Removed support for :attr:`__members__` and :attr:`__methods__`. +* Removed support for :attr:`!__members__` and :attr:`!__methods__`. * The function attributes named :attr:`!func_X` have been renamed to use the :attr:`!__X__` form, freeing up these names in the function @@ -802,7 +802,7 @@ Builtins instance will automatically be chosen. With arguments, the behavior of :func:`super` is unchanged. -* :pep:`3111`: :func:`raw_input` was renamed to :func:`input`. That +* :pep:`3111`: :func:`!raw_input` was renamed to :func:`input`. That is, the new :func:`input` function reads a line from :data:`sys.stdin` and returns it with the trailing newline stripped. It raises :exc:`EOFError` if the input is terminated prematurely. @@ -820,31 +820,31 @@ Builtins argument and a value of the same type as ``x`` when called with two arguments. -* Moved :func:`intern` to :func:`sys.intern`. +* Moved :func:`!intern` to :func:`sys.intern`. -* Removed: :func:`apply`. Instead of ``apply(f, args)`` use +* Removed: :func:`!apply`. Instead of ``apply(f, args)`` use ``f(*args)``. * Removed :func:`callable`. Instead of ``callable(f)`` you can use - ``isinstance(f, collections.Callable)``. The :func:`operator.isCallable` + ``isinstance(f, collections.Callable)``. The :func:`!operator.isCallable` function is also gone. -* Removed :func:`coerce`. This function no longer serves a purpose +* Removed :func:`!coerce`. This function no longer serves a purpose now that classic classes are gone. -* Removed :func:`execfile`. Instead of ``execfile(fn)`` use +* Removed :func:`!execfile`. Instead of ``execfile(fn)`` use ``exec(open(fn).read())``. -* Removed the :class:`file` type. Use :func:`open`. There are now several +* Removed the :class:`!file` type. Use :func:`open`. There are now several different kinds of streams that open can return in the :mod:`io` module. -* Removed :func:`reduce`. Use :func:`functools.reduce` if you really +* Removed :func:`!reduce`. Use :func:`functools.reduce` if you really need it; however, 99 percent of the time an explicit :keyword:`for` loop is more readable. -* Removed :func:`reload`. Use :func:`!imp.reload`. +* Removed :func:`!reload`. Use :func:`!imp.reload`. -* Removed. :meth:`dict.has_key` -- use the :keyword:`in` operator +* Removed. :meth:`!dict.has_key` -- use the :keyword:`in` operator instead. .. ====================================================================== From ae31df354d02e12bf656954c5c72380d96c1dc0e Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 10 Dec 2024 12:51:12 +0200 Subject: [PATCH 02/46] Add zizmor to pre-commit and fix most findings (#127749) Co-authored-by: Alex Waygood --- .github/workflows/build.yml | 11 ++++++++++- .github/workflows/documentation-links.yml | 6 +++--- .github/workflows/jit.yml | 6 ++++++ .github/workflows/lint.yml | 2 ++ .github/workflows/mypy.yml | 2 ++ .github/workflows/require-pr-label.yml | 10 ++++++---- .github/workflows/reusable-change-detection.yml | 2 ++ .github/workflows/reusable-docs.yml | 14 ++++++++++---- .github/workflows/reusable-macos.yml | 2 ++ .github/workflows/reusable-tsan.yml | 9 +++++++-- .github/workflows/reusable-ubuntu.yml | 4 +++- .github/workflows/reusable-wasi.yml | 12 +++++++----- .github/workflows/reusable-windows-msi.yml | 5 ++++- .github/workflows/reusable-windows.yml | 10 ++++++++-- .github/workflows/stale.yml | 5 ++--- .github/workflows/verify-ensurepip-wheels.yml | 2 ++ .github/zizmor.yml | 6 ++++++ .pre-commit-config.yaml | 9 +++++++-- 18 files changed, 89 insertions(+), 28 deletions(-) create mode 100644 .github/zizmor.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9b2f19fd6bcf54..8787402ccc4423 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -58,6 +58,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 1 + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Check Autoconf and aclocal versions @@ -94,6 +95,8 @@ jobs: if: needs.check_source.outputs.run_tests == 'true' steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3.x' @@ -268,6 +271,8 @@ jobs: LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Restore config.cache @@ -328,6 +333,8 @@ jobs: PYTHONSTRICTEXTENSIONBUILD: 1 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install Dependencies @@ -411,7 +418,7 @@ jobs: # # (GH-104097) test_sysconfig is skipped because it has tests that are # failing when executed from inside a virtual environment. - ${{ env.VENV_PYTHON }} -m test \ + "${VENV_PYTHON}" -m test \ -W \ -o \ -j4 \ @@ -446,6 +453,8 @@ jobs: ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Restore config.cache diff --git a/.github/workflows/documentation-links.yml b/.github/workflows/documentation-links.yml index 43a7afec73884e..fdb4b9aa29a7c8 100644 --- a/.github/workflows/documentation-links.yml +++ b/.github/workflows/documentation-links.yml @@ -10,9 +10,6 @@ on: - 'Doc/**' - '.github/workflows/doc.yml' -permissions: - pull-requests: write - concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true @@ -20,6 +17,9 @@ concurrency: jobs: documentation-links: runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: - uses: readthedocs/actions/preview@v1 with: diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index ee30cf5786d55b..9b84998a55666d 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -32,6 +32,8 @@ jobs: timeout-minutes: 90 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Build tier two interpreter run: | ./configure --enable-experimental-jit=interpreter --with-pydebug @@ -85,6 +87,8 @@ jobs: runner: ${{ github.repository_owner == 'python' && 'ubuntu-24.04-aarch64' || 'ubuntu-24.04' }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3.11' @@ -138,6 +142,8 @@ jobs: - 19 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3.11' diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index ccde03f91983df..d74ce8fcc256dc 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,6 +20,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: "3.x" diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index e5b05302b5ac27..5dfa8d7bcafd78 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -51,6 +51,8 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: "3.13" diff --git a/.github/workflows/require-pr-label.yml b/.github/workflows/require-pr-label.yml index bbedd22cc6d189..0a6277c779ff67 100644 --- a/.github/workflows/require-pr-label.yml +++ b/.github/workflows/require-pr-label.yml @@ -4,15 +4,14 @@ on: pull_request: types: [opened, reopened, labeled, unlabeled, synchronize] -permissions: - issues: write - pull-requests: write - jobs: label-dnm: name: DO-NOT-MERGE if: github.repository_owner == 'python' runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write timeout-minutes: 10 steps: @@ -28,6 +27,9 @@ jobs: name: Unresolved review if: github.repository_owner == 'python' runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write timeout-minutes: 10 steps: diff --git a/.github/workflows/reusable-change-detection.yml b/.github/workflows/reusable-change-detection.yml index 1a6fd33186840c..964bd87e815f42 100644 --- a/.github/workflows/reusable-change-detection.yml +++ b/.github/workflows/reusable-change-detection.yml @@ -61,6 +61,8 @@ jobs: - run: >- echo '${{ github.event_name }}' - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Check for source changes id: check run: | diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 39a97392e898aa..3962d12403919a 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -22,12 +22,14 @@ jobs: env: branch_base: 'origin/${{ github.event.pull_request.base.ref }}' branch_pr: 'origin/${{ github.event.pull_request.head.ref }}' + commits: ${{ github.event.pull_request.commits }} refspec_base: '+${{ github.event.pull_request.base.sha }}:remotes/origin/${{ github.event.pull_request.base.ref }}' refspec_pr: '+${{ github.event.pull_request.head.sha }}:remotes/origin/${{ github.event.pull_request.head.ref }}' steps: - name: 'Check out latest PR branch commit' uses: actions/checkout@v4 with: + persist-credentials: false ref: >- ${{ github.event_name == 'pull_request' @@ -39,15 +41,15 @@ jobs: if: github.event_name == 'pull_request' run: | # Fetch enough history to find a common ancestor commit (aka merge-base): - git fetch origin ${{ env.refspec_pr }} --depth=$(( ${{ github.event.pull_request.commits }} + 1 )) \ + git fetch origin "${refspec_pr}" --depth=$(( commits + 1 )) \ --no-tags --prune --no-recurse-submodules # This should get the oldest commit in the local fetched history (which may not be the commit the PR branched from): - COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 ${{ env.branch_pr }} ) + COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 "${branch_pr}" ) DATE=$( git log --date=iso8601 --format=%cd "${COMMON_ANCESTOR}" ) # Get all commits since that commit date from the base branch (eg: master or main): - git fetch origin ${{ env.refspec_base }} --shallow-since="${DATE}" \ + git fetch origin "${refspec_base}" --shallow-since="${DATE}" \ --no-tags --prune --no-recurse-submodules - name: 'Set up Python' uses: actions/setup-python@v5 @@ -69,7 +71,7 @@ jobs: if: github.event_name == 'pull_request' run: | python Doc/tools/check-warnings.py \ - --annotate-diff '${{ env.branch_base }}' '${{ env.branch_pr }}' \ + --annotate-diff "${branch_base}" "${branch_pr}" \ --fail-if-regression \ --fail-if-improved \ --fail-if-new-news-nit @@ -81,6 +83,8 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: 'Set up Python' uses: actions/setup-python@v5 with: @@ -99,6 +103,8 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/cache@v4 with: path: ~/.cache/pip diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index 4c3dd10194f8cb..36ae3e27207e37 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -29,6 +29,8 @@ jobs: runs-on: ${{ inputs.os }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Restore config.cache diff --git a/.github/workflows/reusable-tsan.yml b/.github/workflows/reusable-tsan.yml index 7a4d81f0bdcad1..b5144ca3e9efc4 100644 --- a/.github/workflows/reusable-tsan.yml +++ b/.github/workflows/reusable-tsan.yml @@ -23,8 +23,13 @@ jobs: name: 'Thread sanitizer' runs-on: ubuntu-24.04 timeout-minutes: 60 + env: + OPTIONS: ${{ inputs.options }} + SUPPRESSIONS_PATH: ${{ inputs.suppressions_path }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Restore config.cache @@ -47,7 +52,7 @@ jobs: sudo sysctl -w vm.mmap_rnd_bits=28 - name: TSAN Option Setup run: | - echo "TSAN_OPTIONS=log_path=${GITHUB_WORKSPACE}/tsan_log suppressions=${GITHUB_WORKSPACE}/${{ inputs.suppressions_path }} handle_segv=0" >> "$GITHUB_ENV" + echo "TSAN_OPTIONS=log_path=${GITHUB_WORKSPACE}/tsan_log suppressions=${GITHUB_WORKSPACE}/${SUPPRESSIONS_PATH} handle_segv=0" >> "$GITHUB_ENV" echo "CC=clang" >> "$GITHUB_ENV" echo "CXX=clang++" >> "$GITHUB_ENV" - name: Add ccache to PATH @@ -59,7 +64,7 @@ jobs: save: ${{ github.event_name == 'push' }} max-size: "200M" - name: Configure CPython - run: ${{ inputs.options }} + run: "${OPTIONS}" - name: Build CPython run: make -j4 - name: Display build info diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 2869202c7910c9..46c542940c8483 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -28,6 +28,8 @@ jobs: TERM: linux steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install dependencies @@ -94,7 +96,7 @@ jobs: if: ${{ !inputs.free-threading }} run: >- python Tools/build/check_warnings.py - --compiler-output-file-path=${{ env.CPYTHON_BUILDDIR }}/compiler_output_ubuntu.txt + --compiler-output-file-path="${CPYTHON_BUILDDIR}/compiler_output_ubuntu.txt" --warning-ignore-file-path "${GITHUB_WORKSPACE}/Tools/build/.warningignore_ubuntu" --compiler-output-type=gcc --fail-on-regression diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 3f96c888e2dd30..4356d9c1c8795e 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -20,6 +20,8 @@ jobs: CROSS_BUILD_WASI: cross-build/wasm32-wasip1 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false # No problem resolver registered as one doesn't currently exist for Clang. - name: "Install wasmtime" uses: bytecodealliance/actions/wasmtime/setup@v1 @@ -34,9 +36,9 @@ jobs: - name: "Install WASI SDK" # Hard-coded to x64. if: steps.cache-wasi-sdk.outputs.cache-hit != 'true' run: | - mkdir ${{ env.WASI_SDK_PATH }} && \ - curl -s -S --location https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${{ env.WASI_SDK_VERSION }}/wasi-sdk-${{ env.WASI_SDK_VERSION }}.0-x86_64-linux.tar.gz | \ - tar --strip-components 1 --directory ${{ env.WASI_SDK_PATH }} --extract --gunzip + mkdir "${WASI_SDK_PATH}" && \ + curl -s -S --location "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VERSION}/wasi-sdk-${WASI_SDK_VERSION}.0-x86_64-linux.tar.gz" | \ + tar --strip-components 1 --directory "${WASI_SDK_PATH}" --extract --gunzip - name: "Configure ccache action" uses: hendrikmuhs/ccache-action@v1.2 with: @@ -72,6 +74,6 @@ jobs: - name: "Make host" run: python3 Tools/wasm/wasi.py make-host - name: "Display build info" - run: make --directory ${{ env.CROSS_BUILD_WASI }} pythoninfo + run: make --directory "${CROSS_BUILD_WASI}" pythoninfo - name: "Test" - run: make --directory ${{ env.CROSS_BUILD_WASI }} test + run: make --directory "${CROSS_BUILD_WASI}" test diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index abdb1a1982fef8..d0d53dba0b45d1 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -17,8 +17,11 @@ jobs: runs-on: windows-latest timeout-minutes: 60 env: + ARCH: ${{ inputs.arch }} IncludeFreethreaded: true steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Build CPython installer - run: .\Tools\msi\build.bat --doc -${{ inputs.arch }} + run: .\Tools\msi\build.bat --doc -"${ARCH}" diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index 12b68d68466d62..459d2b29e5d42b 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -26,8 +26,12 @@ jobs: name: 'build and test (${{ inputs.arch }})' runs-on: ${{ inputs.os }} timeout-minutes: 60 + env: + ARCH: ${{ inputs.arch }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Register MSVC problem matcher if: inputs.arch != 'Win32' run: echo "::add-matcher::.github/problem-matchers/msvc.json" @@ -35,8 +39,9 @@ jobs: run: >- .\\PCbuild\\build.bat -e -d -v - -p ${{ inputs.arch }} + -p "${ARCH}" ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} + shell: bash - name: Display build info # FIXME(diegorusso): remove the `if` if: inputs.arch != 'arm64' run: .\\python.bat -m test.pythoninfo @@ -44,6 +49,7 @@ jobs: if: inputs.arch != 'arm64' run: >- .\\PCbuild\\rt.bat - -p ${{ inputs.arch }} + -p "${ARCH}" -d -q --fast-ci ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} + shell: bash diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index f97587e68cbbe4..7578189f5d4d67 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -4,14 +4,13 @@ on: schedule: - cron: "0 */6 * * *" -permissions: - pull-requests: write - jobs: stale: if: github.repository_owner == 'python' runs-on: ubuntu-latest + permissions: + pull-requests: write timeout-minutes: 10 steps: diff --git a/.github/workflows/verify-ensurepip-wheels.yml b/.github/workflows/verify-ensurepip-wheels.yml index 83b007f1c9c2ef..463e7bf3355cc3 100644 --- a/.github/workflows/verify-ensurepip-wheels.yml +++ b/.github/workflows/verify-ensurepip-wheels.yml @@ -26,6 +26,8 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3' diff --git a/.github/zizmor.yml b/.github/zizmor.yml new file mode 100644 index 00000000000000..eeda8d9eaaf484 --- /dev/null +++ b/.github/zizmor.yml @@ -0,0 +1,6 @@ +# Configuration for the zizmor static analysis tool, run via pre-commit in CI +# https://woodruffw.github.io/zizmor/configuration/ +rules: + dangerous-triggers: + ignore: + - documentation-links.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccaf2390d99fae..107f3b255735f4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.8.2 hooks: - id: ruff name: Run Ruff (lint) on Doc/ @@ -51,7 +51,7 @@ repos: types_or: [c, inc, python, rst] - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.29.4 + rev: 0.30.0 hooks: - id: check-dependabot - id: check-github-workflows @@ -61,6 +61,11 @@ repos: hooks: - id: actionlint + - repo: https://github.com/woodruffw/zizmor-pre-commit + rev: v0.8.0 + hooks: + - id: zizmor + - repo: https://github.com/sphinx-contrib/sphinx-lint rev: v1.0.0 hooks: From 690fe077f6b1bf50e9d62078b22c334775efb3af Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 10 Dec 2024 11:53:56 +0100 Subject: [PATCH 03/46] gh-126491: Revert "GH-126491: Lower heap size limit with faster marking (GH-127519)" (GH-127770) Revert "GH-126491: Lower heap size limit with faster marking (GH-127519)" This reverts commit 023b7d2141467017abc27de864f3f44677768cb3, which introduced a refleak. --- InternalDocs/garbage_collector.md | 51 +---- Lib/test/test_gc.py | 14 +- Objects/dictobject.c | 4 +- Objects/genobject.c | 69 ++++++- Objects/typeobject.c | 13 -- Python/gc.c | 301 ++++++++++++++++-------------- 6 files changed, 243 insertions(+), 209 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 394e4ef075f55e..e4cb9e45c9e96a 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -515,45 +515,6 @@ increment. All objects directly referred to from those stack frames are added to the working set. Then the above algorithm is repeated, starting from step 2. -Determining how much work to do -------------------------------- - -We need to do a certain amount of work to ensure that garbage is collected, -but doing too much work slows down execution. - -To work out how much work we need to do, consider a heap with `L` live objects -and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects -at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and -we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. -For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which -`G1` garbage objects are created. - -The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, -assuming that the number of live objects remains roughly constant. -If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` -For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio. - -In other words, to keep the garbage fraction to 1/3 or less we need to visit -4 times as many objects as are newly created. - -We can do better than this though. Not all new objects will be garbage. -Consider the heap at the end of the scavenge with `L1` live objects and `G1` -garbage. Also, note that `T == M + I` where `M` is the number of objects marked -as reachable and `I` is the number of objects visited in increments. -Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. - -If we choose the amount of work done such that `2*M + I == 6N` then we can do -less work in most cases, but are still guaranteed to keep up. -Since `I ≳ G0 + G1` (not strictly true, but close enough) -`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up. - -The reason that this improves performance is that `M` is usually much larger -than `I`. If `M == 10I`, then `T ≅ 3N`. - -Finally, instead of using a fixed multiple of 8, we gradually increase it as the -heap grows. This avoids wasting work for small heaps and during startup. - - Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index baf8e95dffdfce..b5140057a69d36 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1161,19 +1161,27 @@ def make_ll(depth): return head head = make_ll(1000) + count = 1000 + + # There will be some objects we aren't counting, + # e.g. the gc stats dicts. This test checks + # that the counts don't grow, so we try to + # correct for the uncounted objects + # This is just an estimate. + CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - iterations = max(20_000, initial_heap_size) - for i in range(iterations): + for i in range(20_000): newhead = make_ll(20) + count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index de518b8dc5024b..1c9f86438dadc3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7064,7 +7064,9 @@ int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyTypeObject *tp = Py_TYPE(obj); - assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); + if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { + return 0; + } if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); if (values->valid) { diff --git a/Objects/genobject.c b/Objects/genobject.c index 33679afecb420f..e87f199c2504ba 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -882,7 +882,25 @@ PyTypeObject PyGen_Type = { gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ - .tp_finalize = _PyGen_Finalize, + 0, /* tp_base */ + 0, /* tp_dict */ + + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + _PyGen_Finalize, /* tp_finalize */ }; static PyObject * @@ -1224,7 +1242,24 @@ PyTypeObject PyCoro_Type = { coro_methods, /* tp_methods */ coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ - .tp_finalize = _PyGen_Finalize, + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + _PyGen_Finalize, /* tp_finalize */ }; static void @@ -1429,6 +1464,7 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) + static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1637,7 +1673,24 @@ PyTypeObject PyAsyncGen_Type = { async_gen_methods, /* tp_methods */ async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ - .tp_finalize = _PyGen_Finalize, + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + _PyGen_Finalize, /* tp_finalize */ }; @@ -1882,6 +1935,16 @@ PyTypeObject _PyAsyncGenASend_Type = { PyObject_SelfIter, /* tp_iter */ async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ .tp_finalize = async_gen_asend_finalize, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index cc95b9857e3f2d..2068d6aa9be52b 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2355,16 +2355,6 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg) return 0; } - -static int -plain_object_traverse(PyObject *self, visitproc visit, void *arg) -{ - PyTypeObject *type = Py_TYPE(self); - assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); - Py_VISIT(type); - return PyObject_VisitManagedDict(self, visit, arg); -} - static void clear_slots(PyTypeObject *type, PyObject *self) { @@ -4157,9 +4147,6 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; - if (type->tp_basicsize == sizeof(PyObject)) { - type->tp_traverse = plain_object_traverse; - } } type->tp_basicsize = slotoffset; diff --git a/Python/gc.c b/Python/gc.c index fd29a48518e71b..5b9588c8741b97 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1277,13 +1277,18 @@ gc_list_set_space(PyGC_Head *list, int space) * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of - * new objects (N) to the heap, and incremental collectors - * scan I objects from the old space. - * I > N must be true. We also want I > N * K to be where - * K > 2. Higher values of K mean that the old space is + * objects, S (for survivors) to the old space, and + * incremental collectors scan I objects from the old space. + * I > S must be true. We also want I > S * N to be where + * N > 1. Higher values of N mean that the old space is * scanned more rapidly. + * The default incremental threshold of 10 translates to + * N == 1.4 (1 + 4/threshold) */ -#define SCAN_RATE_DIVISOR 5 + +/* Divide by 10, so that the default incremental threshold of 10 + * scans objects at 1% of the heap size */ +#define SCAN_RATE_DIVISOR 10 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1325,76 +1330,69 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -typedef struct work_stack { - PyGC_Head *top; - int visited_space; -} WorkStack; - -/* Remove gc from the list it is currently in and push it to the stack */ -static inline void -push_to_stack(PyGC_Head *gc, WorkStack *stack) -{ - PyGC_Head *prev = GC_PREV(gc); - PyGC_Head *next = GC_NEXT(gc); - _PyGCHead_SET_NEXT(prev, next); - _PyGCHead_SET_PREV(next, prev); - _PyGCHead_SET_PREV(gc, stack->top); - stack->top = gc; -} - -static inline PyGC_Head * -pop_from_stack(WorkStack *stack) +#ifndef NDEBUG +static inline int +IS_IN_VISITED(PyGC_Head *gc, int visited_space) { - PyGC_Head *gc = stack->top; - stack->top = _PyGCHead_PREV(gc); - return gc; + assert(visited_space == 0 || other_space(visited_space) == 0); + return gc_old_space(gc) == visited_space; } +#endif -/* append list `from` to `stack`; `from` becomes an empty list */ -static void -move_list_to_stack(PyGC_Head *from, WorkStack *stack) -{ - if (!gc_list_is_empty(from)) { - PyGC_Head *from_head = GC_NEXT(from); - PyGC_Head *from_tail = GC_PREV(from); - _PyGCHead_SET_PREV(from_head, stack->top); - stack->top = from_tail; - gc_list_init(from); - } -} +struct container_and_flag { + PyGC_Head *container; + int visited_space; + intptr_t size; +}; -static inline void -move_to_stack(PyObject *op, WorkStack *stack, int visited_space) +/* A traversal callback for adding to container) */ +static int +visit_add_to_container(PyObject *op, void *arg) { - assert(op != NULL); - if (_PyObject_IS_GC(op)) { + OBJECT_STAT_INC(object_visits); + struct container_and_flag *cf = (struct container_and_flag *)arg; + int visited = cf->visited_space; + assert(visited == get_gc_state()->visited_space); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - assert(!_Py_IsImmortal(op)); + gc_old_space(gc) != visited) { gc_flip_old_space(gc); - push_to_stack(gc, stack); + gc_list_move(gc, cf->container); + cf->size++; } } + return 0; } -static void -move_unvisited(PyObject *op, WorkStack *stack, int visited_space) -{ - move_to_stack(op, stack, visited_space); -} - -#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V)) - -/* A traversal callback for adding to container */ -static int -visit_add_to_container(PyObject *op, void *arg) -{ - OBJECT_STAT_INC(object_visits); - WorkStack *stack = (WorkStack *)arg; - assert(stack->visited_space == get_gc_state()->visited_space); - move_to_stack(op, stack, stack->visited_space); - return 0; +static intptr_t +expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) +{ + struct container_and_flag arg = { + .container = container, + .visited_space = gcstate->visited_space, + .size = 0 + }; + assert(GC_NEXT(gc) == container); + while (gc != container) { + /* Survivors will be moved to visited space, so they should + * have been marked as visited */ + assert(IS_IN_VISITED(gc, gcstate->visited_space)); + PyObject *op = FROM_GC(gc); + assert(_PyObject_GC_IS_TRACKED(op)); + if (_Py_IsImmortal(op)) { + PyGC_Head *next = GC_NEXT(gc); + gc_list_move(gc, &get_gc_state()->permanent_generation.head); + gc = next; + continue; + } + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); + gc = GC_NEXT(gc); + } + return arg.size; } /* Do bookkeeping for a completed GC cycle */ @@ -1422,62 +1420,54 @@ completed_scavenge(GCState *gcstate) gc_list_set_space(&gcstate->old[not_visited].head, not_visited); } assert(gc_list_is_empty(&gcstate->old[visited].head)); + gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } -static void -frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) -{ - _PyStackRef *locals = frame->localsplus; - _PyStackRef *sp = frame->stackpointer; - if (frame->f_locals != NULL) { - move_unvisited(frame->f_locals, stack, visited_space); - } - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - move_unvisited(func, stack, visited_space); - while (sp > locals) { - sp--; - _PyStackRef ref = *sp; - if (!PyStackRef_IsNull(ref)) { - PyObject *op = PyStackRef_AsPyObjectBorrow(ref); - if (!_Py_IsImmortal(op)) { - move_unvisited(op, stack, visited_space); - } +static intptr_t +move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, reachable); + return 1; } } + return 0; } -static Py_ssize_t -move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) +static intptr_t +mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty - Py_ssize_t objects_marked = 0; - while (stack->top != NULL) { - PyGC_Head *gc = pop_from_stack(stack); + struct container_and_flag arg = { + .container = reachable, + .visited_space = visited_space, + .size = 0 + }; + while (!gc_list_is_empty(reachable)) { + PyGC_Head *gc = _PyGCHead_NEXT(reachable); assert(gc_old_space(gc) == visited_space); - gc_list_append(gc, visited); - objects_marked++; + gc_list_move(gc, visited); PyObject *op = FROM_GC(gc); - assert(PyObject_IS_GC(op)); - assert(_PyObject_GC_IS_TRACKED(op)); - if (_Py_IsImmortal(op)) { - _PyObject_GC_UNTRACK(op); - } - else { - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, visit_add_to_container, stack); - } + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); } gc_list_validate_space(visited, visited_space); - return objects_marked; + return arg.size; } static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - WorkStack stack; - stack.top = NULL; - stack.visited_space = visited_space; + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1490,7 +1480,27 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - frame_move_unvisited(frame, &stack, visited_space); + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, &reachable, visited_space); + while (sp > locals) { + sp--; + if (PyStackRef_IsNull(*sp)) { + continue; + } + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + objects_marked++; + gc_list_move(gc, &reachable); + } + } + } if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1503,31 +1513,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); - assert(stack.top == NULL); + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); return objects_marked; } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { - WorkStack stack; - stack.top = NULL; - stack.visited_space = visited_space; - MOVE_UNVISITED(interp->sysdict, &stack, visited_space); - MOVE_UNVISITED(interp->builtins, &stack, visited_space); - MOVE_UNVISITED(interp->dict, &stack, visited_space); + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); + objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); + objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); - MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); - MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); } - Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); - assert(stack.top == NULL); + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); return objects_marked; } @@ -1539,35 +1549,39 @@ mark_at_start(PyThreadState *tstate) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + gcstate->work_to_do -= objects_marked; gcstate->phase = GC_PHASE_COLLECT; validate_spaces(gcstate); return objects_marked; } - -/* See InternalDocs/garbage_collector.md for more details. */ -#define MAX_HEAP_PORTION_MULTIPLIER 5 -#define MARKING_PROGRESS_MULTIPLIER 2 - static intptr_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on two things. + /* The amount of work we want to do depends on three things. * 1. The number of new objects created - * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) + * 2. The growth in heap size since the last collection + * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + * + * For a steady state heap, the amount of work to do is three times the number + * of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. + * + * This could be improved by tracking survival rates, but it is still a + * large improvement on the non-marking approach. */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; - intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_portion > max_heap_portion) { - heap_portion = max_heap_portion; + intptr_t max_heap_fraction = new_objects*3/2; + intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_fraction > max_heap_fraction) { + heap_fraction = max_heap_fraction; } gcstate->young.count = 0; - return new_objects + heap_portion; + return new_objects + heap_fraction; } static void @@ -1580,37 +1594,36 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; + gcstate->work_to_do -= objects_marked; validate_spaces(gcstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + PyGC_Head increment; + gc_list_init(&increment); + int scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; + gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - PyGC_Head increment; - gc_list_init(&increment); - WorkStack working; - working.top = 0; - working.visited_space = gcstate->visited_space; - move_list_to_stack(&gcstate->young.head, &working); - Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + gc_list_merge(&gcstate->young.head, &increment); gc_list_validate_space(&increment, gcstate->visited_space); - assert(working.top == NULL); + Py_ssize_t increment_size = gc_list_size(&increment); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_set_old_space(gc, gcstate->visited_space); - push_to_stack(gc, &working); + gc_list_move(gc, &increment); + increment_size++; assert(!_Py_IsImmortal(FROM_GC(gc))); - increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); - assert(working.top == NULL); + gc_set_old_space(gc, gcstate->visited_space); + increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } - assert(increment_size == gc_list_size(&increment)); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); @@ -1619,6 +1632,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); @@ -1654,7 +1668,6 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_scavenge(gcstate); - gcstate->work_to_do = -gcstate->young.threshold; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From f4b31edf2d9d72878dab1f66a36913b5bcc848ec Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 10 Dec 2024 11:56:24 +0100 Subject: [PATCH 04/46] gh-127257: ssl: Raise OSError for ERR_LIB_SYS (GH-127361) From the ERR_raise manpage: ERR_LIB_SYS This "library code" indicates that a system error is being reported. In this case, the reason code given to `ERR_raise()` and `ERR_raise_data()` *must* be `errno(3)`. This PR only handles ERR_LIB_SYS for the high-lever error types SSL_ERROR_SYSCALL and SSL_ERROR_SSL, i.e., not the ones where OpenSSL indicates it has some more information about the issue. --- .../2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst | 2 ++ Modules/_ssl.c | 10 ++++++++++ 2 files changed, 12 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst diff --git a/Misc/NEWS.d/next/Library/2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst b/Misc/NEWS.d/next/Library/2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst new file mode 100644 index 00000000000000..fb0380cba0b607 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst @@ -0,0 +1,2 @@ +In :mod:`ssl`, system call failures that OpenSSL reports using +``ERR_LIB_SYS`` are now raised as :exc:`OSError`. diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 59c414f9ce1ceb..e7df132869fee6 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -667,6 +667,11 @@ PySSL_SetError(PySSLSocket *sslsock, const char *filename, int lineno) ERR_GET_REASON(e) == SSL_R_CERTIFICATE_VERIFY_FAILED) { type = state->PySSLCertVerificationErrorObject; } + if (ERR_GET_LIB(e) == ERR_LIB_SYS) { + // A system error is being reported; reason is set to errno + errno = ERR_GET_REASON(e); + return PyErr_SetFromErrno(PyExc_OSError); + } p = PY_SSL_ERROR_SYSCALL; } break; @@ -692,6 +697,11 @@ PySSL_SetError(PySSLSocket *sslsock, const char *filename, int lineno) errstr = "EOF occurred in violation of protocol"; } #endif + if (ERR_GET_LIB(e) == ERR_LIB_SYS) { + // A system error is being reported; reason is set to errno + errno = ERR_GET_REASON(e); + return PyErr_SetFromErrno(PyExc_OSError); + } break; } default: From cef0a90d8f3a94aa534593f39b4abf98165675b9 Mon Sep 17 00:00:00 2001 From: Melissa0x1f992 <70096546+Melissa0x1f992@users.noreply.github.com> Date: Tue, 10 Dec 2024 06:13:11 -0600 Subject: [PATCH 05/46] gh-126937: ctypes: fix TypeError when a field's size is >65535 bytes (GH-126938) Co-authored-by: Peter Bierma Co-authored-by: Terry Jan Reedy Co-authored-by: Petr Viktorin --- Lib/test/test_ctypes/test_struct_fields.py | 23 +++++++++++++++++++ ...-11-17-21-35-55.gh-issue-126937.qluVM0.rst | 3 +++ Modules/_ctypes/cfield.c | 10 ++++++-- Modules/_ctypes/stgdict.c | 4 ++-- 4 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-17-21-35-55.gh-issue-126937.qluVM0.rst diff --git a/Lib/test/test_ctypes/test_struct_fields.py b/Lib/test/test_ctypes/test_struct_fields.py index b5e165f3bae929..1b3e64efd410b8 100644 --- a/Lib/test/test_ctypes/test_struct_fields.py +++ b/Lib/test/test_ctypes/test_struct_fields.py @@ -1,4 +1,5 @@ import unittest +import sys from ctypes import Structure, Union, sizeof, c_char, c_int from ._support import (CField, Py_TPFLAGS_DISALLOW_INSTANTIATION, Py_TPFLAGS_IMMUTABLETYPE) @@ -75,6 +76,28 @@ def __init_subclass__(cls, **kwargs): 'ctypes state is not initialized'): class Subclass(BrokenStructure): ... + def test_max_field_size_gh126937(self): + # Classes for big structs should be created successfully. + # (But they most likely can't be instantiated.) + # Here we test the exact limit: the number of *bits* must fit + # in Py_ssize_t. + + class X(self.cls): + _fields_ = [('char', c_char),] + max_field_size = sys.maxsize // 8 + + class Y(self.cls): + _fields_ = [('largeField', X * max_field_size)] + class Z(self.cls): + _fields_ = [('largeField', c_char * max_field_size)] + + with self.assertRaises(ValueError): + class TooBig(self.cls): + _fields_ = [('largeField', X * (max_field_size + 1))] + with self.assertRaises(ValueError): + class TooBig(self.cls): + _fields_ = [('largeField', c_char * (max_field_size + 1))] + # __set__ and __get__ should raise a TypeError in case their self # argument is not a ctype instance. def test___set__(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-17-21-35-55.gh-issue-126937.qluVM0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-17-21-35-55.gh-issue-126937.qluVM0.rst new file mode 100644 index 00000000000000..8d7da0d4107021 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-17-21-35-55.gh-issue-126937.qluVM0.rst @@ -0,0 +1,3 @@ +Fix :exc:`TypeError` when a :class:`ctypes.Structure` has a field size +that doesn't fit into an unsigned 16-bit integer. +Instead, the maximum number of *bits* is :data:`sys.maxsize`. diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 3220852c8398e0..2b9e8a1a10d6f5 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -110,10 +110,16 @@ PyCField_new_impl(PyTypeObject *type, PyObject *name, PyObject *proto, goto error; } - Py_ssize_t bit_size = NUM_BITS(size); - if (bit_size) { + if (bit_size_obj != Py_None) { +#ifdef Py_DEBUG + Py_ssize_t bit_size = NUM_BITS(size); assert(bit_size > 0); assert(bit_size <= info->size * 8); + // Currently, the bit size is specified redundantly + // in NUM_BITS(size) and bit_size_obj. + // Verify that they match. + assert(PyLong_AsSsize_t(bit_size_obj) == bit_size); +#endif switch(info->ffi_type_pointer.type) { case FFI_TYPE_UINT8: case FFI_TYPE_UINT16: diff --git a/Modules/_ctypes/stgdict.c b/Modules/_ctypes/stgdict.c index 5dbbe0b3285d58..5ca5b62427600d 100644 --- a/Modules/_ctypes/stgdict.c +++ b/Modules/_ctypes/stgdict.c @@ -292,7 +292,7 @@ PyCStructUnionType_update_stginfo(PyObject *type, PyObject *fields, int isStruct if (!tmp) { goto error; } - Py_ssize_t total_align = PyLong_AsInt(tmp); + Py_ssize_t total_align = PyLong_AsSsize_t(tmp); Py_DECREF(tmp); if (total_align < 0) { if (!PyErr_Occurred()) { @@ -306,7 +306,7 @@ PyCStructUnionType_update_stginfo(PyObject *type, PyObject *fields, int isStruct if (!tmp) { goto error; } - Py_ssize_t total_size = PyLong_AsInt(tmp); + Py_ssize_t total_size = PyLong_AsSsize_t(tmp); Py_DECREF(tmp); if (total_size < 0) { if (!PyErr_Occurred()) { From 9af96f440618304e7cc609c246e1f8c8b2d7a119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:58:17 +0100 Subject: [PATCH 06/46] gh-127563: use `dk_log2_index_bytes=3` in empty dicts (GH-127568) This fixes a UBSan failure (unaligned zero-size memcpy) in `dictobject.c`. --- Objects/dictobject.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 1c9f86438dadc3..05c93a3e448181 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -588,11 +588,14 @@ estimate_log2_keysize(Py_ssize_t n) /* This immutable, empty PyDictKeysObject is used for PyDict_Clear() * (which cannot fail and thus can do no allocation). + * + * See https://github.com/python/cpython/pull/127568#discussion_r1868070614 + * for the rationale of using dk_log2_index_bytes=3 instead of 0. */ static PyDictKeysObject empty_keys_struct = { _Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */ 0, /* dk_log2_size */ - 0, /* dk_log2_index_bytes */ + 3, /* dk_log2_index_bytes */ DICT_KEYS_UNICODE, /* dk_kind */ #ifdef Py_GIL_DISABLED {0}, /* dk_mutex */ From c91ccbe4ac0ec15c503521f539b3528db85871b4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 10 Dec 2024 17:33:11 +0100 Subject: [PATCH 07/46] gh-59705: Set OS thread name when Thread.name is changed (#127702) Co-authored-by: Petr Viktorin --- Doc/library/threading.rst | 12 ++++++++++++ Lib/test/test_threading.py | 19 +++++++++++++++++++ Lib/threading.py | 16 +++++++++++----- Modules/_threadmodule.c | 3 +-- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index d4b343db36efb3..f183f3f535c4cb 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -434,6 +434,18 @@ since it is impossible to detect the termination of alien threads. Multiple threads may be given the same name. The initial name is set by the constructor. + On some platforms, the thread name is set at the operating system level + when the thread starts, so that it is visible in task managers. + This name may be truncated to fit in a system-specific limit (for example, + 15 bytes on Linux or 63 bytes on macOS). + + Changes to *name* are only reflected at the OS level when the currently + running thread is renamed. (Setting the *name* attribute of a + different thread only updates the Python Thread object.) + + .. versionchanged:: 3.14 + Set the operating system thread name. + .. method:: getName() setName() diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index d05161f46f1034..3e164a12581dd1 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2164,6 +2164,25 @@ def work(): self.assertEqual(work_name, expected, f"{len(work_name)=} and {len(expected)=}") + @unittest.skipUnless(hasattr(_thread, 'set_name'), "missing _thread.set_name") + @unittest.skipUnless(hasattr(_thread, '_get_name'), "missing _thread._get_name") + def test_change_name(self): + # Change the name of a thread while the thread is running + + name1 = None + name2 = None + def work(): + nonlocal name1, name2 + name1 = _thread._get_name() + threading.current_thread().name = "new name" + name2 = _thread._get_name() + + thread = threading.Thread(target=work, name="name") + thread.start() + thread.join() + self.assertEqual(name1, "name") + self.assertEqual(name2, "new name") + class InterruptMainTests(unittest.TestCase): def check_interrupt_main_with_signal_handler(self, signum): diff --git a/Lib/threading.py b/Lib/threading.py index 3abd22a2aa1b72..78e591124278fc 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -1026,16 +1026,20 @@ def _set_ident(self): def _set_native_id(self): self._native_id = get_native_id() + def _set_os_name(self): + if _set_name is None or not self._name: + return + try: + _set_name(self._name) + except OSError: + pass + def _bootstrap_inner(self): try: self._set_ident() if _HAVE_THREAD_NATIVE_ID: self._set_native_id() - if _set_name is not None and self._name: - try: - _set_name(self._name) - except OSError: - pass + self._set_os_name() self._started.set() with _active_limbo_lock: _active[self._ident] = self @@ -1115,6 +1119,8 @@ def name(self): def name(self, name): assert self._initialized, "Thread.__init__() not called" self._name = str(name) + if get_ident() == self._ident: + self._set_os_name() @property def ident(self): diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 35c032fbeaa94f..75b34a8df7622c 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -2423,8 +2423,7 @@ _thread_set_name_impl(PyObject *module, PyObject *name_obj) #ifdef PYTHREAD_NAME_MAXLEN // Truncate to PYTHREAD_NAME_MAXLEN bytes + the NUL byte if needed - size_t len = PyBytes_GET_SIZE(name_encoded); - if (len > PYTHREAD_NAME_MAXLEN) { + if (PyBytes_GET_SIZE(name_encoded) > PYTHREAD_NAME_MAXLEN) { PyObject *truncated; truncated = PyBytes_FromStringAndSize(PyBytes_AS_STRING(name_encoded), PYTHREAD_NAME_MAXLEN); From 035f512046337e64a018d11fdaa3b21758625291 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Wed, 11 Dec 2024 02:35:00 +0900 Subject: [PATCH 08/46] Docs: Fix indents in `xmlrpc.client.rst` (#127782) --- Doc/library/xmlrpc.client.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index c57f433e6efd98..971e65605841e7 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -64,11 +64,11 @@ between conformable Python objects and XML on the wire. The obsolete *use_datetime* flag is similar to *use_builtin_types* but it applies only to date/time values. -.. versionchanged:: 3.3 - The *use_builtin_types* flag was added. + .. versionchanged:: 3.3 + The *use_builtin_types* flag was added. -.. versionchanged:: 3.8 - The *headers* parameter was added. + .. versionchanged:: 3.8 + The *headers* parameter was added. Both the HTTP and HTTPS transports support the URL syntax extension for HTTP Basic Authentication: ``http://user:pass@host:port/path``. The ``user:pass`` From 51216857ca8283f5b41c8cf9874238da56da4968 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Wed, 11 Dec 2024 05:32:04 +0800 Subject: [PATCH 09/46] gh-126821: Add versionadded annotation to use_system_logger feature. (#127755) Add versionadded annotation to use_system_logger feature. --- Doc/c-api/init_config.rst | 2 ++ iOS/README.rst | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 7497bf241fb10e..6b33d93a9f2af9 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -1290,6 +1290,8 @@ PyConfig Default: ``0`` (don't use system log). + .. versionadded:: 3.13.2 + .. c:member:: int user_site_directory If non-zero, add the user site directory to :data:`sys.path`. diff --git a/iOS/README.rst b/iOS/README.rst index 9cea98cf1abbfa..13b885144932e4 100644 --- a/iOS/README.rst +++ b/iOS/README.rst @@ -286,7 +286,7 @@ This will: * Run the test suite on an "iPhone SE (3rd generation)" simulator. On success, the test suite will exit and report successful completion of the -test suite. On a 2022 M1 MacBook Pro, the test suite takes approximately 12 +test suite. On a 2022 M1 MacBook Pro, the test suite takes approximately 15 minutes to run; a couple of extra minutes is required to compile the testbed project, and then boot and prepare the iOS simulator. From 12b4f1a5a175d4dcec27631fce2883038f0917ae Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Wed, 11 Dec 2024 00:09:55 +0000 Subject: [PATCH 10/46] GH-127381: pathlib ABCs: remove `PathBase.samefile()` and rarer `is_*()` (#127709) Remove `PathBase.samefile()`, which is fairly specific to the local FS, and relies on `stat()`, which we're aiming to remove from `PathBase`. Also remove `PathBase.is_mount()`, `is_junction()`, `is_block_device()`, `is_char_device()`, `is_fifo()` and `is_socket()`. These rely on POSIX file type numbers that we're aiming to remove from the `PathBase` API. --- Lib/pathlib/_abc.py | 89 +---------------------- Lib/pathlib/_local.py | 65 ++++++++++++++++- Lib/test/test_pathlib/test_pathlib.py | 77 +++++++++++++++++++- Lib/test/test_pathlib/test_pathlib_abc.py | 81 --------------------- 4 files changed, 141 insertions(+), 171 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index f68685f21d6d79..02c6e0500617aa 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -16,7 +16,7 @@ import posixpath from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks -from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +from stat import S_ISDIR, S_ISLNK, S_ISREG from pathlib._os import copyfileobj @@ -408,26 +408,6 @@ def is_file(self, *, follow_symlinks=True): except (OSError, ValueError): return False - def is_mount(self): - """ - Check if this path is a mount point - """ - # Need to exist and be a dir - if not self.exists() or not self.is_dir(): - return False - - try: - parent_dev = self.parent.stat().st_dev - except OSError: - return False - - dev = self.stat().st_dev - if dev != parent_dev: - return True - ino = self.stat().st_ino - parent_ino = self.parent.stat().st_ino - return ino == parent_ino - def is_symlink(self): """ Whether this path is a symbolic link. @@ -437,76 +417,11 @@ def is_symlink(self): except (OSError, ValueError): return False - def is_junction(self): - """ - Whether this path is a junction. - """ - # Junctions are a Windows-only feature, not present in POSIX nor the - # majority of virtual filesystems. There is no cross-platform idiom - # to check for junctions (using stat().st_mode). - return False - - def is_block_device(self): - """ - Whether this path is a block device. - """ - try: - return S_ISBLK(self.stat().st_mode) - except (OSError, ValueError): - return False - - def is_char_device(self): - """ - Whether this path is a character device. - """ - try: - return S_ISCHR(self.stat().st_mode) - except (OSError, ValueError): - return False - - def is_fifo(self): - """ - Whether this path is a FIFO. - """ - try: - return S_ISFIFO(self.stat().st_mode) - except (OSError, ValueError): - return False - - def is_socket(self): - """ - Whether this path is a socket. - """ - try: - return S_ISSOCK(self.stat().st_mode) - except (OSError, ValueError): - return False - - def samefile(self, other_path): - """Return whether other_path is the same or not as this file - (as returned by os.path.samefile()). - """ - st = self.stat() - try: - other_st = other_path.stat() - except AttributeError: - other_st = self.with_segments(other_path).stat() - return (st.st_ino == other_st.st_ino and - st.st_dev == other_st.st_dev) - def _ensure_different_file(self, other_path): """ Raise OSError(EINVAL) if both paths refer to the same file. """ - try: - if not self.samefile(other_path): - return - except (OSError, ValueError): - return - err = OSError(EINVAL, "Source and target are the same file") - err.filename = str(self) - err.filename2 = str(other_path) - raise err + pass def _ensure_distinct_path(self, other_path): """ diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index f87069ce70a2de..85437ec80bfcc4 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -4,9 +4,10 @@ import os import posixpath import sys -from errno import EXDEV +from errno import EINVAL, EXDEV from glob import _StringGlobber from itertools import chain +from stat import S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from _collections_abc import Sequence try: @@ -596,6 +597,68 @@ def is_junction(self): """ return os.path.isjunction(self) + def is_block_device(self): + """ + Whether this path is a block device. + """ + try: + return S_ISBLK(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_char_device(self): + """ + Whether this path is a character device. + """ + try: + return S_ISCHR(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_fifo(self): + """ + Whether this path is a FIFO. + """ + try: + return S_ISFIFO(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_socket(self): + """ + Whether this path is a socket. + """ + try: + return S_ISSOCK(self.stat().st_mode) + except (OSError, ValueError): + return False + + def samefile(self, other_path): + """Return whether other_path is the same or not as this file + (as returned by os.path.samefile()). + """ + st = self.stat() + try: + other_st = other_path.stat() + except AttributeError: + other_st = self.with_segments(other_path).stat() + return (st.st_ino == other_st.st_ino and + st.st_dev == other_st.st_dev) + + def _ensure_different_file(self, other_path): + """ + Raise OSError(EINVAL) if both paths refer to the same file. + """ + try: + if not self.samefile(other_path): + return + except (OSError, ValueError): + return + err = OSError(EINVAL, "Source and target are the same file") + err.filename = str(self) + err.filename2 = str(other_path) + raise err + def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index ce0f4748c860b1..b57ef420bfcbcd 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1786,13 +1786,31 @@ def test_lstat_nosymlink(self): st = p.stat() self.assertEqual(st, p.lstat()) - def test_is_junction(self): + def test_is_junction_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_junction()) + self.assertFalse((P / 'dirA').is_junction()) + self.assertFalse((P / 'non-existing').is_junction()) + self.assertFalse((P / 'fileA' / 'bah').is_junction()) + self.assertFalse((P / 'fileA\udfff').is_junction()) + self.assertFalse((P / 'fileA\x00').is_junction()) + + def test_is_junction_true(self): P = self.cls(self.base) with mock.patch.object(P.parser, 'isjunction'): self.assertEqual(P.is_junction(), P.parser.isjunction.return_value) P.parser.isjunction.assert_called_once_with(P) + def test_is_fifo_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_fifo()) + self.assertFalse((P / 'dirA').is_fifo()) + self.assertFalse((P / 'non-existing').is_fifo()) + self.assertFalse((P / 'fileA' / 'bah').is_fifo()) + self.assertIs((P / 'fileA\udfff').is_fifo(), False) + self.assertIs((P / 'fileA\x00').is_fifo(), False) + @unittest.skipUnless(hasattr(os, "mkfifo"), "os.mkfifo() required") @unittest.skipIf(sys.platform == "vxworks", "fifo requires special path on VxWorks") @@ -1808,6 +1826,15 @@ def test_is_fifo_true(self): self.assertIs(self.cls(self.base, 'myfifo\udfff').is_fifo(), False) self.assertIs(self.cls(self.base, 'myfifo\x00').is_fifo(), False) + def test_is_socket_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_socket()) + self.assertFalse((P / 'dirA').is_socket()) + self.assertFalse((P / 'non-existing').is_socket()) + self.assertFalse((P / 'fileA' / 'bah').is_socket()) + self.assertIs((P / 'fileA\udfff').is_socket(), False) + self.assertIs((P / 'fileA\x00').is_socket(), False) + @unittest.skipUnless(hasattr(socket, "AF_UNIX"), "Unix sockets required") @unittest.skipIf( is_emscripten, "Unix sockets are not implemented on Emscripten." @@ -1831,6 +1858,24 @@ def test_is_socket_true(self): self.assertIs(self.cls(self.base, 'mysock\udfff').is_socket(), False) self.assertIs(self.cls(self.base, 'mysock\x00').is_socket(), False) + def test_is_block_device_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_block_device()) + self.assertFalse((P / 'dirA').is_block_device()) + self.assertFalse((P / 'non-existing').is_block_device()) + self.assertFalse((P / 'fileA' / 'bah').is_block_device()) + self.assertIs((P / 'fileA\udfff').is_block_device(), False) + self.assertIs((P / 'fileA\x00').is_block_device(), False) + + def test_is_char_device_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_char_device()) + self.assertFalse((P / 'dirA').is_char_device()) + self.assertFalse((P / 'non-existing').is_char_device()) + self.assertFalse((P / 'fileA' / 'bah').is_char_device()) + self.assertIs((P / 'fileA\udfff').is_char_device(), False) + self.assertIs((P / 'fileA\x00').is_char_device(), False) + def test_is_char_device_true(self): # os.devnull should generally be a char device. P = self.cls(os.devnull) @@ -1842,7 +1887,14 @@ def test_is_char_device_true(self): self.assertIs(self.cls(f'{os.devnull}\udfff').is_char_device(), False) self.assertIs(self.cls(f'{os.devnull}\x00').is_char_device(), False) - def test_is_mount_root(self): + def test_is_mount(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_mount()) + self.assertFalse((P / 'dirA').is_mount()) + self.assertFalse((P / 'non-existing').is_mount()) + self.assertFalse((P / 'fileA' / 'bah').is_mount()) + if self.can_symlink: + self.assertFalse((P / 'linkA').is_mount()) if os.name == 'nt': R = self.cls('c:\\') else: @@ -1850,6 +1902,27 @@ def test_is_mount_root(self): self.assertTrue(R.is_mount()) self.assertFalse((R / '\udfff').is_mount()) + def test_samefile(self): + parser = self.parser + fileA_path = parser.join(self.base, 'fileA') + fileB_path = parser.join(self.base, 'dirB', 'fileB') + p = self.cls(fileA_path) + pp = self.cls(fileA_path) + q = self.cls(fileB_path) + self.assertTrue(p.samefile(fileA_path)) + self.assertTrue(p.samefile(pp)) + self.assertFalse(p.samefile(fileB_path)) + self.assertFalse(p.samefile(q)) + # Test the non-existent file case + non_existent = parser.join(self.base, 'foo') + r = self.cls(non_existent) + self.assertRaises(FileNotFoundError, p.samefile, r) + self.assertRaises(FileNotFoundError, p.samefile, non_existent) + self.assertRaises(FileNotFoundError, r.samefile, p) + self.assertRaises(FileNotFoundError, r.samefile, non_existent) + self.assertRaises(FileNotFoundError, r.samefile, r) + self.assertRaises(FileNotFoundError, r.samefile, non_existent) + def test_passing_kwargs_errors(self): with self.assertRaises(TypeError): self.cls(foo="bar") diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index dd9425ce393623..f4c364c6fe5109 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1300,15 +1300,9 @@ def test_unsupported_operation(self): e = UnsupportedOperation self.assertRaises(e, p.stat) self.assertRaises(e, p.exists) - self.assertRaises(e, p.samefile, 'foo') self.assertRaises(e, p.is_dir) self.assertRaises(e, p.is_file) - self.assertRaises(e, p.is_mount) self.assertRaises(e, p.is_symlink) - self.assertRaises(e, p.is_block_device) - self.assertRaises(e, p.is_char_device) - self.assertRaises(e, p.is_fifo) - self.assertRaises(e, p.is_socket) self.assertRaises(e, p.open) self.assertRaises(e, p.read_bytes) self.assertRaises(e, p.read_text) @@ -1535,27 +1529,6 @@ def assertEqualNormCase(self, path_a, path_b): normcase = self.parser.normcase self.assertEqual(normcase(path_a), normcase(path_b)) - def test_samefile(self): - parser = self.parser - fileA_path = parser.join(self.base, 'fileA') - fileB_path = parser.join(self.base, 'dirB', 'fileB') - p = self.cls(fileA_path) - pp = self.cls(fileA_path) - q = self.cls(fileB_path) - self.assertTrue(p.samefile(fileA_path)) - self.assertTrue(p.samefile(pp)) - self.assertFalse(p.samefile(fileB_path)) - self.assertFalse(p.samefile(q)) - # Test the non-existent file case - non_existent = parser.join(self.base, 'foo') - r = self.cls(non_existent) - self.assertRaises(FileNotFoundError, p.samefile, r) - self.assertRaises(FileNotFoundError, p.samefile, non_existent) - self.assertRaises(FileNotFoundError, r.samefile, p) - self.assertRaises(FileNotFoundError, r.samefile, non_existent) - self.assertRaises(FileNotFoundError, r.samefile, r) - self.assertRaises(FileNotFoundError, r.samefile, non_existent) - def test_exists(self): P = self.cls p = P(self.base) @@ -2115,15 +2088,6 @@ def test_is_file_no_follow_symlinks(self): self.assertFalse((P / 'fileA\udfff').is_file(follow_symlinks=False)) self.assertFalse((P / 'fileA\x00').is_file(follow_symlinks=False)) - def test_is_mount(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_mount()) - self.assertFalse((P / 'dirA').is_mount()) - self.assertFalse((P / 'non-existing').is_mount()) - self.assertFalse((P / 'fileA' / 'bah').is_mount()) - if self.can_symlink: - self.assertFalse((P / 'linkA').is_mount()) - def test_is_symlink(self): P = self.cls(self.base) self.assertFalse((P / 'fileA').is_symlink()) @@ -2140,51 +2104,6 @@ def test_is_symlink(self): self.assertIs((P / 'linkA\udfff').is_file(), False) self.assertIs((P / 'linkA\x00').is_file(), False) - def test_is_junction_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_junction()) - self.assertFalse((P / 'dirA').is_junction()) - self.assertFalse((P / 'non-existing').is_junction()) - self.assertFalse((P / 'fileA' / 'bah').is_junction()) - self.assertFalse((P / 'fileA\udfff').is_junction()) - self.assertFalse((P / 'fileA\x00').is_junction()) - - def test_is_fifo_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_fifo()) - self.assertFalse((P / 'dirA').is_fifo()) - self.assertFalse((P / 'non-existing').is_fifo()) - self.assertFalse((P / 'fileA' / 'bah').is_fifo()) - self.assertIs((P / 'fileA\udfff').is_fifo(), False) - self.assertIs((P / 'fileA\x00').is_fifo(), False) - - def test_is_socket_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_socket()) - self.assertFalse((P / 'dirA').is_socket()) - self.assertFalse((P / 'non-existing').is_socket()) - self.assertFalse((P / 'fileA' / 'bah').is_socket()) - self.assertIs((P / 'fileA\udfff').is_socket(), False) - self.assertIs((P / 'fileA\x00').is_socket(), False) - - def test_is_block_device_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_block_device()) - self.assertFalse((P / 'dirA').is_block_device()) - self.assertFalse((P / 'non-existing').is_block_device()) - self.assertFalse((P / 'fileA' / 'bah').is_block_device()) - self.assertIs((P / 'fileA\udfff').is_block_device(), False) - self.assertIs((P / 'fileA\x00').is_block_device(), False) - - def test_is_char_device_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_char_device()) - self.assertFalse((P / 'dirA').is_char_device()) - self.assertFalse((P / 'non-existing').is_char_device()) - self.assertFalse((P / 'fileA' / 'bah').is_char_device()) - self.assertIs((P / 'fileA\udfff').is_char_device(), False) - self.assertIs((P / 'fileA\x00').is_char_device(), False) - def test_delete_file(self): p = self.cls(self.base) / 'fileA' p._delete() From db9bea0386c1c0b6c9d7c66474cda7e47e4b56f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Srinivas=20Reddy=20Thatiparthy=20=28=E0=B0=A4=E0=B0=BE?= =?UTF-8?q?=E0=B0=9F=E0=B0=BF=E0=B0=AA=E0=B0=B0=E0=B1=8D=E0=B0=A4=E0=B0=BF?= =?UTF-8?q?=20=E0=B0=B6=E0=B1=8D=E0=B0=B0=E0=B1=80=E0=B0=A8=E0=B0=BF?= =?UTF-8?q?=E0=B0=B5=E0=B0=BE=E0=B0=B8=E0=B1=8D=20=20=E0=B0=B0=E0=B1=86?= =?UTF-8?q?=E0=B0=A1=E0=B1=8D=E0=B0=A1=E0=B0=BF=29?= Date: Wed, 11 Dec 2024 13:05:17 +0530 Subject: [PATCH 11/46] gh-127740: For odd-length input to bytes.fromhex(...) change the error message to ValueError: fromhex() arg must be of even length (#127756) --- Lib/test/test_bytes.py | 6 ++++++ ...4-12-10-21-08-05.gh-issue-127740.0tWC9h.rst | 3 +++ Objects/bytesobject.c | 18 ++++++++++++++---- 3 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-10-21-08-05.gh-issue-127740.0tWC9h.rst diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 9e1985bb3a7639..32cd178fa3b445 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -459,6 +459,12 @@ def test_fromhex(self): self.assertRaises(ValueError, self.type2test.fromhex, '\x00') self.assertRaises(ValueError, self.type2test.fromhex, '12 \x00 34') + # For odd number of character(s) + for value in ("a", "aaa", "deadbee"): + with self.assertRaises(ValueError) as cm: + self.type2test.fromhex(value) + self.assertIn("fromhex() arg must contain an even number of hexadecimal digits", str(cm.exception)) + for data, pos in ( # invalid first hexadecimal character ('12 x4 56', 3), diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-10-21-08-05.gh-issue-127740.0tWC9h.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-10-21-08-05.gh-issue-127740.0tWC9h.rst new file mode 100644 index 00000000000000..f614dbb59bdc87 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-10-21-08-05.gh-issue-127740.0tWC9h.rst @@ -0,0 +1,3 @@ +Fix error message in :func:`bytes.fromhex` when given an odd number of +digits to properly indicate that an even number of hexadecimal digits is +required. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 8c7651f0f3aa45..533089d25cd73a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2543,7 +2543,12 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) bot = _PyLong_DigitValue[*str]; if (bot >= 16) { - invalid_char = str - PyUnicode_1BYTE_DATA(string); + /* Check if we had a second digit */ + if (str >= end){ + invalid_char = -1; + } else { + invalid_char = str - PyUnicode_1BYTE_DATA(string); + } goto error; } str++; @@ -2554,9 +2559,14 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) return _PyBytesWriter_Finish(&writer, buf); error: - PyErr_Format(PyExc_ValueError, - "non-hexadecimal number found in " - "fromhex() arg at position %zd", invalid_char); + if (invalid_char == -1) { + PyErr_SetString(PyExc_ValueError, + "fromhex() arg must contain an even number of hexadecimal digits"); + } else { + PyErr_Format(PyExc_ValueError, + "non-hexadecimal number found in " + "fromhex() arg at position %zd", invalid_char); + } _PyBytesWriter_Dealloc(&writer); return NULL; } From 2cdeb61b57e638ae46a04386330a12abe9cddf2c Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 11 Dec 2024 04:27:07 -0500 Subject: [PATCH 12/46] Add `extern "C"` around `PyTraceMalloc_` functions. (#127772) Pretty much everything else exported by Python.h has an extern "C" annotation, yet this header appears to be missing one. --- Include/cpython/tracemalloc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Include/cpython/tracemalloc.h b/Include/cpython/tracemalloc.h index 61a16ea9a9f3eb..6d094291ae2e90 100644 --- a/Include/cpython/tracemalloc.h +++ b/Include/cpython/tracemalloc.h @@ -1,6 +1,9 @@ #ifndef Py_LIMITED_API #ifndef Py_TRACEMALLOC_H #define Py_TRACEMALLOC_H +#ifdef __cplusplus +extern "C" { +#endif /* Track an allocated memory block in the tracemalloc module. Return 0 on success, return -1 on error (failed to allocate memory to store @@ -22,5 +25,8 @@ PyAPI_FUNC(int) PyTraceMalloc_Untrack( unsigned int domain, uintptr_t ptr); +#ifdef __cplusplus +} +#endif #endif // !Py_TRACEMALLOC_H #endif // !Py_LIMITED_API From d5d84c3f13fe7fe591b375c41979d362bc11957a Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Wed, 11 Dec 2024 06:14:04 -0500 Subject: [PATCH 13/46] gh-127791: Fix, document, and test `PyUnstable_AtExit` (#127793) --- Doc/c-api/init.rst | 9 ++++ Doc/c-api/sys.rst | 4 ++ Include/internal/pycore_atexit.h | 1 - ...-12-10-14-25-22.gh-issue-127791.YRw4GU.rst | 2 + Modules/_testcapimodule.c | 48 +++++++++++++++++++ Modules/_testinternalcapi.c | 34 ------------- Modules/atexitmodule.c | 12 +++-- 7 files changed, 71 insertions(+), 39 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-12-10-14-25-22.gh-issue-127791.YRw4GU.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index ba1c2852f0bd53..dd63dd013e32dc 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -567,6 +567,15 @@ Initializing and finalizing the interpreter customized Python that always runs in isolated mode using :c:func:`Py_RunMain`. +.. c:function:: int PyUnstable_AtExit(PyInterpreterState *interp, void (*func)(void *), void *data) + + Register an :mod:`atexit` callback for the target interpreter *interp*. + This is similar to :c:func:`Py_AtExit`, but takes an explicit interpreter and + data pointer for the callback. + + The :term:`GIL` must be held for *interp*. + + .. versionadded:: 3.13 Process-wide parameters ======================= diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index d6fca1a0b0a219..c688afdca8231d 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -426,3 +426,7 @@ Process Control function registered last is called first. Each cleanup function will be called at most once. Since Python's internal finalization will have completed before the cleanup function, no Python APIs should be called by *func*. + + .. seealso:: + + :c:func:`PyUnstable_AtExit` for passing a ``void *data`` argument. diff --git a/Include/internal/pycore_atexit.h b/Include/internal/pycore_atexit.h index 507a5c03cbc792..cde5b530baf00c 100644 --- a/Include/internal/pycore_atexit.h +++ b/Include/internal/pycore_atexit.h @@ -44,7 +44,6 @@ typedef struct { struct atexit_state { atexit_callback *ll_callbacks; - atexit_callback *last_ll_callback; // XXX The rest of the state could be moved to the atexit module state // and a low-level callback added for it during module exec. diff --git a/Misc/NEWS.d/next/C_API/2024-12-10-14-25-22.gh-issue-127791.YRw4GU.rst b/Misc/NEWS.d/next/C_API/2024-12-10-14-25-22.gh-issue-127791.YRw4GU.rst new file mode 100644 index 00000000000000..70751f18f5cf17 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-10-14-25-22.gh-issue-127791.YRw4GU.rst @@ -0,0 +1,2 @@ +Fix loss of callbacks after more than one call to +:c:func:`PyUnstable_AtExit`. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 26f68691e44f83..8d86b535effb9a 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3353,6 +3353,53 @@ type_freeze(PyObject *module, PyObject *args) Py_RETURN_NONE; } +struct atexit_data { + int called; + PyThreadState *tstate; + PyInterpreterState *interp; +}; + +static void +atexit_callback(void *data) +{ + struct atexit_data *at_data = (struct atexit_data *)data; + // Ensure that the callback is from the same interpreter + assert(PyThreadState_Get() == at_data->tstate); + assert(PyInterpreterState_Get() == at_data->interp); + ++at_data->called; +} + +static PyObject * +test_atexit(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyThreadState *oldts = PyThreadState_Swap(NULL); + PyThreadState *tstate = Py_NewInterpreter(); + + struct atexit_data data = {0}; + data.tstate = PyThreadState_Get(); + data.interp = PyInterpreterState_Get(); + + int amount = 10; + for (int i = 0; i < amount; ++i) + { + int res = PyUnstable_AtExit(tstate->interp, atexit_callback, (void *)&data); + if (res < 0) { + Py_EndInterpreter(tstate); + PyThreadState_Swap(oldts); + PyErr_SetString(PyExc_RuntimeError, "atexit callback failed"); + return NULL; + } + } + + Py_EndInterpreter(tstate); + PyThreadState_Swap(oldts); + + if (data.called != amount) { + PyErr_SetString(PyExc_RuntimeError, "atexit callback not called"); + return NULL; + } + Py_RETURN_NONE; +} static PyMethodDef TestMethods[] = { {"set_errno", set_errno, METH_VARARGS}, @@ -3495,6 +3542,7 @@ static PyMethodDef TestMethods[] = { {"test_critical_sections", test_critical_sections, METH_NOARGS}, {"finalize_thread_hang", finalize_thread_hang, METH_O, NULL}, {"type_freeze", type_freeze, METH_VARARGS}, + {"test_atexit", test_atexit, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 1bb71a3e80b39d..288daf09a5fe5c 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1236,39 +1236,6 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); } - -struct atexit_data { - int called; -}; - -static void -callback(void *data) -{ - ((struct atexit_data *)data)->called += 1; -} - -static PyObject * -test_atexit(PyObject *self, PyObject *Py_UNUSED(args)) -{ - PyThreadState *oldts = PyThreadState_Swap(NULL); - PyThreadState *tstate = Py_NewInterpreter(); - - struct atexit_data data = {0}; - int res = PyUnstable_AtExit(tstate->interp, callback, (void *)&data); - Py_EndInterpreter(tstate); - PyThreadState_Swap(oldts); - if (res < 0) { - return NULL; - } - - if (data.called == 0) { - PyErr_SetString(PyExc_RuntimeError, "atexit callback not called"); - return NULL; - } - Py_RETURN_NONE; -} - - static PyObject * test_pyobject_is_freed(const char *test_name, PyObject *op) { @@ -2128,7 +2095,6 @@ static PyMethodDef module_functions[] = { {"_PyTraceMalloc_GetTraceback", tracemalloc_get_traceback, METH_VARARGS}, {"test_tstate_capi", test_tstate_capi, METH_NOARGS, NULL}, {"_PyUnicode_TransformDecimalAndSpaceToASCII", unicode_transformdecimalandspacetoascii, METH_O}, - {"test_atexit", test_atexit, METH_NOARGS}, {"check_pyobject_forbidden_bytes_is_freed", check_pyobject_forbidden_bytes_is_freed, METH_NOARGS}, {"check_pyobject_freed_is_freed", check_pyobject_freed_is_freed, METH_NOARGS}, diff --git a/Modules/atexitmodule.c b/Modules/atexitmodule.c index 297a8d74ba3bf4..c009235b7a36c2 100644 --- a/Modules/atexitmodule.c +++ b/Modules/atexitmodule.c @@ -27,7 +27,10 @@ int PyUnstable_AtExit(PyInterpreterState *interp, atexit_datacallbackfunc func, void *data) { - assert(interp == _PyInterpreterState_GET()); + PyThreadState *tstate = _PyThreadState_GET(); + _Py_EnsureTstateNotNULL(tstate); + assert(tstate->interp == interp); + atexit_callback *callback = PyMem_Malloc(sizeof(atexit_callback)); if (callback == NULL) { PyErr_NoMemory(); @@ -38,12 +41,13 @@ PyUnstable_AtExit(PyInterpreterState *interp, callback->next = NULL; struct atexit_state *state = &interp->atexit; - if (state->ll_callbacks == NULL) { + atexit_callback *top = state->ll_callbacks; + if (top == NULL) { state->ll_callbacks = callback; - state->last_ll_callback = callback; } else { - state->last_ll_callback->next = callback; + callback->next = top; + state->ll_callbacks = callback; } return 0; } From ce76b547f94de6b1c9c74657b4e8f150365ad76f Mon Sep 17 00:00:00 2001 From: Justin Applegate <70449145+Legoclones@users.noreply.github.com> Date: Wed, 11 Dec 2024 05:37:59 -0700 Subject: [PATCH 14/46] gh-126992: Change pickle code to base 10 for load_long and load_int (GH-127042) Co-authored-by: Serhiy Storchaka --- Lib/pickle.py | 4 +- Lib/test/pickletester.py | 20 ++++++++++ Lib/test/test_pickletools.py | 37 +++++++++++++++++++ ...-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst | 1 + Modules/_pickle.c | 11 ++---- 5 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst diff --git a/Lib/pickle.py b/Lib/pickle.py index 25dadb3f75a573..1920973e3f83e9 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1387,7 +1387,7 @@ def load_int(self): elif data == TRUE[1:]: val = True else: - val = int(data, 0) + val = int(data) self.append(val) dispatch[INT[0]] = load_int @@ -1407,7 +1407,7 @@ def load_long(self): val = self.readline()[:-1] if val and val[-1] == b'L'[0]: val = val[:-1] - self.append(int(val, 0)) + self.append(int(val)) dispatch[LONG[0]] = load_long def load_long1(self): diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index cf020a48b81cfa..bdc7ef62943a28 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1012,6 +1012,26 @@ def test_constants(self): self.assertIs(self.loads(b'I01\n.'), True) self.assertIs(self.loads(b'I00\n.'), False) + def test_zero_padded_integers(self): + self.assertEqual(self.loads(b'I010\n.'), 10) + self.assertEqual(self.loads(b'I-010\n.'), -10) + self.assertEqual(self.loads(b'I0010\n.'), 10) + self.assertEqual(self.loads(b'I-0010\n.'), -10) + self.assertEqual(self.loads(b'L010\n.'), 10) + self.assertEqual(self.loads(b'L-010\n.'), -10) + self.assertEqual(self.loads(b'L0010\n.'), 10) + self.assertEqual(self.loads(b'L-0010\n.'), -10) + self.assertEqual(self.loads(b'L010L\n.'), 10) + self.assertEqual(self.loads(b'L-010L\n.'), -10) + + def test_nondecimal_integers(self): + self.assertRaises(ValueError, self.loads, b'I0b10\n.') + self.assertRaises(ValueError, self.loads, b'I0o10\n.') + self.assertRaises(ValueError, self.loads, b'I0x10\n.') + self.assertRaises(ValueError, self.loads, b'L0b10L\n.') + self.assertRaises(ValueError, self.loads, b'L0o10L\n.') + self.assertRaises(ValueError, self.loads, b'L0x10L\n.') + def test_empty_bytestring(self): # issue 11286 empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r') diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index 265dc497ccb86c..a178d3353eecdf 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -443,6 +443,43 @@ def test_persid(self): highest protocol among opcodes = 0 ''') + def test_constants(self): + self.check_dis(b"(NI00\nI01\n\x89\x88t.", '''\ + 0: ( MARK + 1: N NONE + 2: I INT False + 6: I INT True + 10: \\x89 NEWFALSE + 11: \\x88 NEWTRUE + 12: t TUPLE (MARK at 0) + 13: . STOP +highest protocol among opcodes = 2 +''') + + def test_integers(self): + self.check_dis(b"(I0\nI1\nI10\nI011\nL12\nL13L\nL014\nL015L\nt.", '''\ + 0: ( MARK + 1: I INT 0 + 4: I INT 1 + 7: I INT 10 + 11: I INT 11 + 16: L LONG 12 + 20: L LONG 13 + 25: L LONG 14 + 30: L LONG 15 + 36: t TUPLE (MARK at 0) + 37: . STOP +highest protocol among opcodes = 0 +''') + + def test_nondecimal_integers(self): + self.check_dis_error(b'I0b10\n.', '', 'invalid literal for int') + self.check_dis_error(b'I0o10\n.', '', 'invalid literal for int') + self.check_dis_error(b'I0x10\n.', '', 'invalid literal for int') + self.check_dis_error(b'L0b10L\n.', '', 'invalid literal for int') + self.check_dis_error(b'L0o10L\n.', '', 'invalid literal for int') + self.check_dis_error(b'L0x10L\n.', '', 'invalid literal for int') + class MiscTestCase(unittest.TestCase): def test__all__(self): diff --git a/Misc/NEWS.d/next/Library/2024-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst b/Misc/NEWS.d/next/Library/2024-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst new file mode 100644 index 00000000000000..526785f68cc807 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst @@ -0,0 +1 @@ +Fix LONG and INT opcodes to only use base 10 for string to integer conversion in :mod:`pickle`. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 2696f38046121f..599b5f92c2a1f7 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -5211,16 +5211,14 @@ load_int(PickleState *state, UnpicklerObject *self) return bad_readline(state); errno = 0; - /* XXX: Should the base argument of strtol() be explicitly set to 10? - XXX(avassalotti): Should this uses PyOS_strtol()? */ - x = strtol(s, &endptr, 0); + /* XXX(avassalotti): Should this uses PyOS_strtol()? */ + x = strtol(s, &endptr, 10); if (errno || (*endptr != '\n' && *endptr != '\0')) { /* Hm, maybe we've got something long. Let's try reading * it as a Python int object. */ errno = 0; - /* XXX: Same thing about the base here. */ - value = PyLong_FromString(s, NULL, 0); + value = PyLong_FromString(s, NULL, 10); if (value == NULL) { PyErr_SetString(PyExc_ValueError, "could not convert string to int"); @@ -5370,8 +5368,7 @@ load_long(PickleState *state, UnpicklerObject *self) the 'L' to be present. */ if (s[len-2] == 'L') s[len-2] = '\0'; - /* XXX: Should the base argument explicitly set to 10? */ - value = PyLong_FromString(s, NULL, 0); + value = PyLong_FromString(s, NULL, 10); if (value == NULL) return -1; From b2ad7e0a2c1518539d9b0ef83c9f7a09d10fd303 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:57:51 +0200 Subject: [PATCH 15/46] CI: Use bash to properly expand variable (#127822) --- .github/workflows/reusable-windows-msi.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index d0d53dba0b45d1..a1c45d954247fb 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -24,4 +24,5 @@ jobs: with: persist-credentials: false - name: Build CPython installer - run: .\Tools\msi\build.bat --doc -"${ARCH}" + run: ./Tools/msi/build.bat --doc -"${ARCH}" + shell: bash From 359389ed51aecc107681e600b71852c0a97304e1 Mon Sep 17 00:00:00 2001 From: Nano Date: Wed, 11 Dec 2024 21:28:19 +0800 Subject: [PATCH 16/46] gh-123401: Fix http.cookies module to support obsolete RFC 850 date format (#123405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Wulian <1055917385@qq.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner --- Lib/http/cookies.py | 6 ++- Lib/test/test_http_cookies.py | 46 +++++++++++++++++++ ...-08-27-18-58-01.gh-issue-123401.t4-FpI.rst | 3 ++ 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-27-18-58-01.gh-issue-123401.t4-FpI.rst diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index d7e8d08b2d92c1..23d5461f86fc23 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -425,9 +425,11 @@ def OutputString(self, attrs=None): ( # Optional group: there may not be a value. \s*=\s* # Equal Sign (?P # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any doublequoted string + "(?:[^\\"]|\\.)*" # Any double-quoted string | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + # Special case for "expires" attr + (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day + [\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Date and time in specific format | # or [""" + _LegalValueChars + r"""]* # Any word or empty string ) # End of group 'val' diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py index 8879902a6e2f41..7b3dc0fdaedc3b 100644 --- a/Lib/test/test_http_cookies.py +++ b/Lib/test/test_http_cookies.py @@ -59,6 +59,52 @@ def test_basic(self): for k, v in sorted(case['dict'].items()): self.assertEqual(C[k].value, v) + def test_obsolete_rfc850_date_format(self): + # Test cases with different days and dates in obsolete RFC 850 format + test_cases = [ + # from RFC 850, change EST to GMT + # https://datatracker.ietf.org/doc/html/rfc850#section-2 + { + 'data': 'key=value; expires=Saturday, 01-Jan-83 00:00:00 GMT', + 'output': 'Saturday, 01-Jan-83 00:00:00 GMT' + }, + { + 'data': 'key=value; expires=Friday, 19-Nov-82 16:59:30 GMT', + 'output': 'Friday, 19-Nov-82 16:59:30 GMT' + }, + # from RFC 9110 + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.6.7-6 + { + 'data': 'key=value; expires=Sunday, 06-Nov-94 08:49:37 GMT', + 'output': 'Sunday, 06-Nov-94 08:49:37 GMT' + }, + # other test cases + { + 'data': 'key=value; expires=Wednesday, 09-Nov-94 08:49:37 GMT', + 'output': 'Wednesday, 09-Nov-94 08:49:37 GMT' + }, + { + 'data': 'key=value; expires=Friday, 11-Nov-94 08:49:37 GMT', + 'output': 'Friday, 11-Nov-94 08:49:37 GMT' + }, + { + 'data': 'key=value; expires=Monday, 14-Nov-94 08:49:37 GMT', + 'output': 'Monday, 14-Nov-94 08:49:37 GMT' + }, + ] + + for case in test_cases: + with self.subTest(data=case['data']): + C = cookies.SimpleCookie() + C.load(case['data']) + + # Extract the cookie name from the data string + cookie_name = case['data'].split('=')[0] + + # Check if the cookie is loaded correctly + self.assertIn(cookie_name, C) + self.assertEqual(C[cookie_name].get('expires'), case['output']) + def test_unquote(self): cases = [ (r'a="b=\""', 'b="'), diff --git a/Misc/NEWS.d/next/Library/2024-08-27-18-58-01.gh-issue-123401.t4-FpI.rst b/Misc/NEWS.d/next/Library/2024-08-27-18-58-01.gh-issue-123401.t4-FpI.rst new file mode 100644 index 00000000000000..638f3f76239ca6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-27-18-58-01.gh-issue-123401.t4-FpI.rst @@ -0,0 +1,3 @@ +The :mod:`http.cookies` module now supports parsing obsolete :rfc:`850` +date formats, in accordance with :rfc:`9110` requirements. +Patch by Nano Zheng. From 5a23994a3dbee43a0b08f5920032f60f38b63071 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 11 Dec 2024 14:02:59 +0000 Subject: [PATCH 17/46] GH-127058: Make `PySequence_Tuple` safer and probably faster. (#127758) * Use a small buffer, then list when constructing a tuple from an arbitrary sequence. --- Include/internal/pycore_list.h | 2 +- Lib/test/test_capi/test_tuple.py | 25 ++++++ ...-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst | 3 + Objects/abstract.c | 87 +++++++++---------- Objects/listobject.c | 19 ++++ 5 files changed, 88 insertions(+), 48 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index f03e484f5ef8b0..836ff30abfcedb 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -62,7 +62,7 @@ typedef struct { union _PyStackRef; PyAPI_FUNC(PyObject *)_PyList_FromStackRefSteal(const union _PyStackRef *src, Py_ssize_t n); - +PyAPI_FUNC(PyObject *)_PyList_AsTupleAndClear(PyListObject *v); #ifdef __cplusplus } diff --git a/Lib/test/test_capi/test_tuple.py b/Lib/test/test_capi/test_tuple.py index e6b49caeb51f32..6349467c5d6b70 100644 --- a/Lib/test/test_capi/test_tuple.py +++ b/Lib/test/test_capi/test_tuple.py @@ -1,5 +1,6 @@ import unittest import sys +import gc from collections import namedtuple from test.support import import_helper @@ -257,5 +258,29 @@ def test__tuple_resize(self): self.assertRaises(SystemError, resize, [1, 2, 3], 0, False) self.assertRaises(SystemError, resize, NULL, 0, False) + def test_bug_59313(self): + # Before 3.14, the C-API function PySequence_Tuple + # would create incomplete tuples which were visible to + # the cycle GC, and this test would crash the interpeter. + TAG = object() + tuples = [] + + def referrer_tuples(): + return [x for x in gc.get_referrers(TAG) + if isinstance(x, tuple)] + + def my_iter(): + nonlocal tuples + yield TAG # 'tag' gets stored in the result tuple + tuples += referrer_tuples() + for x in range(10): + tuples += referrer_tuples() + # Prior to 3.13 would raise a SystemError when the tuple needs to be resized + yield x + + self.assertEqual(tuple(my_iter()), (TAG, *range(10))) + self.assertEqual(tuples, []) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst new file mode 100644 index 00000000000000..248e1b4855afb8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst @@ -0,0 +1,3 @@ +``PySequence_Tuple`` now creates the resulting tuple atomically, preventing +partially created tuples being visible to the garbage collector or through +``gc.get_referrers()`` diff --git a/Objects/abstract.c b/Objects/abstract.c index f6647874d732f6..c92ef10aa79648 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1993,9 +1993,6 @@ PyObject * PySequence_Tuple(PyObject *v) { PyObject *it; /* iter(v) */ - Py_ssize_t n; /* guess for result tuple size */ - PyObject *result = NULL; - Py_ssize_t j; if (v == NULL) { return null_error(); @@ -2017,58 +2014,54 @@ PySequence_Tuple(PyObject *v) if (it == NULL) return NULL; - /* Guess result size and allocate space. */ - n = PyObject_LengthHint(v, 10); - if (n == -1) - goto Fail; - result = PyTuple_New(n); - if (result == NULL) - goto Fail; - - /* Fill the tuple. */ - for (j = 0; ; ++j) { + Py_ssize_t n; + PyObject *buffer[8]; + for (n = 0; n < 8; n++) { PyObject *item = PyIter_Next(it); if (item == NULL) { - if (PyErr_Occurred()) - goto Fail; - break; - } - if (j >= n) { - size_t newn = (size_t)n; - /* The over-allocation strategy can grow a bit faster - than for lists because unlike lists the - over-allocation isn't permanent -- we reclaim - the excess before the end of this routine. - So, grow by ten and then add 25%. - */ - newn += 10u; - newn += newn >> 2; - if (newn > PY_SSIZE_T_MAX) { - /* Check for overflow */ - PyErr_NoMemory(); - Py_DECREF(item); - goto Fail; + if (PyErr_Occurred()) { + goto fail; } - n = (Py_ssize_t)newn; - if (_PyTuple_Resize(&result, n) != 0) { - Py_DECREF(item); - goto Fail; + Py_DECREF(it); + return _PyTuple_FromArraySteal(buffer, n); + } + buffer[n] = item; + } + PyListObject *list = (PyListObject *)PyList_New(16); + if (list == NULL) { + goto fail; + } + assert(n == 8); + Py_SET_SIZE(list, n); + for (Py_ssize_t j = 0; j < n; j++) { + PyList_SET_ITEM(list, j, buffer[j]); + } + for (;;) { + PyObject *item = PyIter_Next(it); + if (item == NULL) { + if (PyErr_Occurred()) { + Py_DECREF(list); + Py_DECREF(it); + return NULL; } + break; + } + if (_PyList_AppendTakeRef(list, item) < 0) { + Py_DECREF(list); + Py_DECREF(it); + return NULL; } - PyTuple_SET_ITEM(result, j, item); } - - /* Cut tuple back if guess was too large. */ - if (j < n && - _PyTuple_Resize(&result, j) != 0) - goto Fail; - Py_DECREF(it); - return result; - -Fail: - Py_XDECREF(result); + PyObject *res = _PyList_AsTupleAndClear(list); + Py_DECREF(list); + return res; +fail: Py_DECREF(it); + while (n > 0) { + n--; + Py_DECREF(buffer[n]); + } return NULL; } diff --git a/Objects/listobject.c b/Objects/listobject.c index 3832295600a0ab..a877bad66be45f 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3174,6 +3174,25 @@ PyList_AsTuple(PyObject *v) return ret; } +PyObject * +_PyList_AsTupleAndClear(PyListObject *self) +{ + assert(self != NULL); + PyObject *ret; + if (self->ob_item == NULL) { + return PyTuple_New(0); + } + Py_BEGIN_CRITICAL_SECTION(self); + PyObject **items = self->ob_item; + Py_ssize_t size = Py_SIZE(self); + self->ob_item = NULL; + Py_SET_SIZE(self, 0); + ret = _PyTuple_FromArraySteal(items, size); + free_list_items(items, false); + Py_END_CRITICAL_SECTION(); + return ret; +} + PyObject * _PyList_FromStackRefSteal(const _PyStackRef *src, Py_ssize_t n) { From b0f278ff0551b06191cec01445c577e3b25570da Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 11 Dec 2024 16:06:07 +0100 Subject: [PATCH 18/46] gh-127065: Make methodcaller thread-safe and re-entrant (GH-127746) The function `operator.methodcaller` was not thread-safe since the additional of the vectorcall method in gh-89013. In the free threading build the issue is easy to trigger, for the normal build harder. This makes the `methodcaller` safe by: * Replacing the lazy initialization with initialization in the constructor. * Using a stack allocated space for the vectorcall arguments and falling back to `tp_call` for calls with more than 8 arguments. --- .../test_free_threading/test_methodcaller.py | 33 ++++ Lib/test/test_operator.py | 13 ++ ...-12-01-22-28-41.gh-issue-127065.tFpRer.rst | 1 + Modules/_operator.c | 180 ++++++++---------- 4 files changed, 131 insertions(+), 96 deletions(-) create mode 100644 Lib/test/test_free_threading/test_methodcaller.py create mode 100644 Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst diff --git a/Lib/test/test_free_threading/test_methodcaller.py b/Lib/test/test_free_threading/test_methodcaller.py new file mode 100644 index 00000000000000..8846b0010012f2 --- /dev/null +++ b/Lib/test/test_free_threading/test_methodcaller.py @@ -0,0 +1,33 @@ +import unittest +from threading import Thread +from test.support import threading_helper +from operator import methodcaller + + +class TestMethodcaller(unittest.TestCase): + def test_methodcaller_threading(self): + number_of_threads = 10 + size = 4_000 + + mc = methodcaller("append", 2) + + def work(mc, l, ii): + for _ in range(ii): + mc(l) + + worker_threads = [] + lists = [] + for ii in range(number_of_threads): + l = [] + lists.append(l) + worker_threads.append(Thread(target=work, args=[mc, l, size])) + for t in worker_threads: + t.start() + for t in worker_threads: + t.join() + for l in lists: + assert len(l) == size + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py index 812d46482e238a..82578a0ef1e6f2 100644 --- a/Lib/test/test_operator.py +++ b/Lib/test/test_operator.py @@ -482,6 +482,8 @@ def bar(self, f=42): return f def baz(*args, **kwds): return kwds['name'], kwds['self'] + def return_arguments(self, *args, **kwds): + return args, kwds a = A() f = operator.methodcaller('foo') self.assertRaises(IndexError, f, a) @@ -498,6 +500,17 @@ def baz(*args, **kwds): f = operator.methodcaller('baz', name='spam', self='eggs') self.assertEqual(f(a), ('spam', 'eggs')) + many_positional_arguments = tuple(range(10)) + many_kw_arguments = dict(zip('abcdefghij', range(10))) + f = operator.methodcaller('return_arguments', *many_positional_arguments) + self.assertEqual(f(a), (many_positional_arguments, {})) + + f = operator.methodcaller('return_arguments', **many_kw_arguments) + self.assertEqual(f(a), ((), many_kw_arguments)) + + f = operator.methodcaller('return_arguments', *many_positional_arguments, **many_kw_arguments) + self.assertEqual(f(a), (many_positional_arguments, many_kw_arguments)) + def test_inplace(self): operator = self.module class C(object): diff --git a/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst b/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst new file mode 100644 index 00000000000000..03d6953b9ddfa1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst @@ -0,0 +1 @@ +Make :func:`operator.methodcaller` thread-safe and re-entrant safe. diff --git a/Modules/_operator.c b/Modules/_operator.c index 6c1945174ab7cd..ce3ef015710223 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1595,78 +1595,75 @@ static PyType_Spec attrgetter_type_spec = { typedef struct { PyObject_HEAD PyObject *name; - PyObject *xargs; // reference to arguments passed in constructor + PyObject *args; PyObject *kwds; - PyObject **vectorcall_args; /* Borrowed references */ + PyObject *vectorcall_args; PyObject *vectorcall_kwnames; vectorcallfunc vectorcall; } methodcallerobject; -#ifndef Py_GIL_DISABLED -static int _methodcaller_initialize_vectorcall(methodcallerobject* mc) -{ - PyObject* args = mc->xargs; - PyObject* kwds = mc->kwds; - - Py_ssize_t nargs = PyTuple_GET_SIZE(args); - assert(nargs > 0); - mc->vectorcall_args = PyMem_Calloc( - nargs + (kwds ? PyDict_Size(kwds) : 0), - sizeof(PyObject*)); - if (!mc->vectorcall_args) { - PyErr_NoMemory(); - return -1; - } - /* The first item of vectorcall_args will be filled with obj later */ - if (nargs > 1) { - memcpy(mc->vectorcall_args, PySequence_Fast_ITEMS(args), - nargs * sizeof(PyObject*)); - } - if (kwds) { - const Py_ssize_t nkwds = PyDict_Size(kwds); - - mc->vectorcall_kwnames = PyTuple_New(nkwds); - if (!mc->vectorcall_kwnames) { - return -1; - } - Py_ssize_t i = 0, ppos = 0; - PyObject* key, * value; - while (PyDict_Next(kwds, &ppos, &key, &value)) { - PyTuple_SET_ITEM(mc->vectorcall_kwnames, i, Py_NewRef(key)); - mc->vectorcall_args[nargs + i] = value; // borrowed reference - ++i; - } - } - else { - mc->vectorcall_kwnames = NULL; - } - return 1; -} +#define _METHODCALLER_MAX_ARGS 8 static PyObject * -methodcaller_vectorcall( - methodcallerobject *mc, PyObject *const *args, size_t nargsf, PyObject* kwnames) +methodcaller_vectorcall(methodcallerobject *mc, PyObject *const *args, + size_t nargsf, PyObject* kwnames) { if (!_PyArg_CheckPositional("methodcaller", PyVectorcall_NARGS(nargsf), 1, 1) || !_PyArg_NoKwnames("methodcaller", kwnames)) { return NULL; } - if (mc->vectorcall_args == NULL) { - if (_methodcaller_initialize_vectorcall(mc) < 0) { - return NULL; - } - } + assert(mc->vectorcall_args != NULL); + + PyObject *tmp_args[_METHODCALLER_MAX_ARGS]; + tmp_args[0] = args[0]; + assert(1 + PyTuple_GET_SIZE(mc->vectorcall_args) <= _METHODCALLER_MAX_ARGS); + memcpy(tmp_args + 1, _PyTuple_ITEMS(mc->vectorcall_args), sizeof(PyObject *) * PyTuple_GET_SIZE(mc->vectorcall_args)); - assert(mc->vectorcall_args != 0); - mc->vectorcall_args[0] = args[0]; - return PyObject_VectorcallMethod( - mc->name, mc->vectorcall_args, - (PyTuple_GET_SIZE(mc->xargs)) | PY_VECTORCALL_ARGUMENTS_OFFSET, + return PyObject_VectorcallMethod(mc->name, tmp_args, + (1 + PyTuple_GET_SIZE(mc->args)) | PY_VECTORCALL_ARGUMENTS_OFFSET, mc->vectorcall_kwnames); } -#endif +static int +_methodcaller_initialize_vectorcall(methodcallerobject* mc) +{ + PyObject* args = mc->args; + PyObject* kwds = mc->kwds; + + if (kwds && PyDict_Size(kwds)) { + PyObject *values = PyDict_Values(kwds); + if (!values) { + return -1; + } + PyObject *values_tuple = PySequence_Tuple(values); + Py_DECREF(values); + if (!values_tuple) { + return -1; + } + if (PyTuple_GET_SIZE(args)) { + mc->vectorcall_args = PySequence_Concat(args, values_tuple); + Py_DECREF(values_tuple); + if (mc->vectorcall_args == NULL) { + return -1; + } + } + else { + mc->vectorcall_args = values_tuple; + } + mc->vectorcall_kwnames = PySequence_Tuple(kwds); + if (!mc->vectorcall_kwnames) { + return -1; + } + } + else { + mc->vectorcall_args = Py_NewRef(args); + mc->vectorcall_kwnames = NULL; + } + + mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; + return 0; +} /* AC 3.5: variable number of arguments, not currently support by AC */ static PyObject * @@ -1694,25 +1691,30 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (mc == NULL) { return NULL; } + mc->vectorcall = NULL; + mc->vectorcall_args = NULL; + mc->vectorcall_kwnames = NULL; + mc->kwds = Py_XNewRef(kwds); Py_INCREF(name); PyInterpreterState *interp = _PyInterpreterState_GET(); _PyUnicode_InternMortal(interp, &name); mc->name = name; - mc->xargs = Py_XNewRef(args); // allows us to use borrowed references - mc->kwds = Py_XNewRef(kwds); - mc->vectorcall_args = 0; - + mc->args = PyTuple_GetSlice(args, 1, PyTuple_GET_SIZE(args)); + if (mc->args == NULL) { + Py_DECREF(mc); + return NULL; + } -#ifdef Py_GIL_DISABLED - // gh-127065: The current implementation of methodcaller_vectorcall - // is not thread-safe because it modifies the `vectorcall_args` array, - // which is shared across calls. - mc->vectorcall = NULL; -#else - mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; -#endif + Py_ssize_t vectorcall_size = PyTuple_GET_SIZE(args) + + (kwds ? PyDict_Size(kwds) : 0); + if (vectorcall_size < (_METHODCALLER_MAX_ARGS)) { + if (_methodcaller_initialize_vectorcall(mc) < 0) { + Py_DECREF(mc); + return NULL; + } + } PyObject_GC_Track(mc); return (PyObject *)mc; @@ -1722,13 +1724,10 @@ static void methodcaller_clear(methodcallerobject *mc) { Py_CLEAR(mc->name); - Py_CLEAR(mc->xargs); + Py_CLEAR(mc->args); Py_CLEAR(mc->kwds); - if (mc->vectorcall_args != NULL) { - PyMem_Free(mc->vectorcall_args); - mc->vectorcall_args = 0; - Py_CLEAR(mc->vectorcall_kwnames); - } + Py_CLEAR(mc->vectorcall_args); + Py_CLEAR(mc->vectorcall_kwnames); } static void @@ -1745,8 +1744,10 @@ static int methodcaller_traverse(methodcallerobject *mc, visitproc visit, void *arg) { Py_VISIT(mc->name); - Py_VISIT(mc->xargs); + Py_VISIT(mc->args); Py_VISIT(mc->kwds); + Py_VISIT(mc->vectorcall_args); + Py_VISIT(mc->vectorcall_kwnames); Py_VISIT(Py_TYPE(mc)); return 0; } @@ -1765,15 +1766,7 @@ methodcaller_call(methodcallerobject *mc, PyObject *args, PyObject *kw) if (method == NULL) return NULL; - - PyObject *cargs = PyTuple_GetSlice(mc->xargs, 1, PyTuple_GET_SIZE(mc->xargs)); - if (cargs == NULL) { - Py_DECREF(method); - return NULL; - } - - result = PyObject_Call(method, cargs, mc->kwds); - Py_DECREF(cargs); + result = PyObject_Call(method, mc->args, mc->kwds); Py_DECREF(method); return result; } @@ -1791,7 +1784,7 @@ methodcaller_repr(methodcallerobject *mc) } numkwdargs = mc->kwds != NULL ? PyDict_GET_SIZE(mc->kwds) : 0; - numposargs = PyTuple_GET_SIZE(mc->xargs) - 1; + numposargs = PyTuple_GET_SIZE(mc->args); numtotalargs = numposargs + numkwdargs; if (numtotalargs == 0) { @@ -1807,7 +1800,7 @@ methodcaller_repr(methodcallerobject *mc) } for (i = 0; i < numposargs; ++i) { - PyObject *onerepr = PyObject_Repr(PyTuple_GET_ITEM(mc->xargs, i+1)); + PyObject *onerepr = PyObject_Repr(PyTuple_GET_ITEM(mc->args, i)); if (onerepr == NULL) goto done; PyTuple_SET_ITEM(argreprs, i, onerepr); @@ -1859,14 +1852,14 @@ methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) { if (!mc->kwds || PyDict_GET_SIZE(mc->kwds) == 0) { Py_ssize_t i; - Py_ssize_t newarg_size = PyTuple_GET_SIZE(mc->xargs); - PyObject *newargs = PyTuple_New(newarg_size); + Py_ssize_t callargcount = PyTuple_GET_SIZE(mc->args); + PyObject *newargs = PyTuple_New(1 + callargcount); if (newargs == NULL) return NULL; PyTuple_SET_ITEM(newargs, 0, Py_NewRef(mc->name)); - for (i = 1; i < newarg_size; ++i) { - PyObject *arg = PyTuple_GET_ITEM(mc->xargs, i); - PyTuple_SET_ITEM(newargs, i, Py_NewRef(arg)); + for (i = 0; i < callargcount; ++i) { + PyObject *arg = PyTuple_GET_ITEM(mc->args, i); + PyTuple_SET_ITEM(newargs, i + 1, Py_NewRef(arg)); } return Py_BuildValue("ON", Py_TYPE(mc), newargs); } @@ -1884,12 +1877,7 @@ methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) constructor = PyObject_VectorcallDict(partial, newargs, 2, mc->kwds); Py_DECREF(partial); - PyObject *args = PyTuple_GetSlice(mc->xargs, 1, PyTuple_GET_SIZE(mc->xargs)); - if (!args) { - Py_DECREF(constructor); - return NULL; - } - return Py_BuildValue("NO", constructor, args); + return Py_BuildValue("NO", constructor, mc->args); } } From dd9da738ad1d420fabafaded3fe63912b2b17cfb Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Wed, 11 Dec 2024 11:28:44 -0500 Subject: [PATCH 19/46] gh-118915: C API: Document frame locals proxies. (#127720) Co-authored-by: Alex Waygood --- Doc/c-api/frame.rst | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/frame.rst b/Doc/c-api/frame.rst index 638a740e0c24da..1a52e146a69751 100644 --- a/Doc/c-api/frame.rst +++ b/Doc/c-api/frame.rst @@ -132,7 +132,7 @@ See also :ref:`Reflection `. .. versionadded:: 3.11 .. versionchanged:: 3.13 - As part of :pep:`667`, return a proxy object for optimized scopes. + As part of :pep:`667`, return an instance of :c:var:`PyFrameLocalsProxy_Type`. .. c:function:: int PyFrame_GetLineNumber(PyFrameObject *frame) @@ -140,6 +140,26 @@ See also :ref:`Reflection `. Return the line number that *frame* is currently executing. +Frame Locals Proxies +^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 3.13 + +The :attr:`~frame.f_locals` attribute on a :ref:`frame object ` +is an instance of a "frame-locals proxy". The proxy object exposes a +write-through view of the underlying locals dictionary for the frame. This +ensures that the variables exposed by ``f_locals`` are always up to date with +the live local variables in the frame itself. + +See :pep:`667` for more information. + +.. c:var:: PyTypeObject PyFrameLocalsProxy_Type + + The type of frame :func:`locals` proxy objects. + +.. c:function:: int PyFrameLocalsProxy_Check(PyObject *obj) + + Return non-zero if *obj* is a frame :func:`locals` proxy. Internal Frames ^^^^^^^^^^^^^^^ From bc262de06b10a2d119c28bac75060bf00301697a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 11 Dec 2024 17:37:38 +0000 Subject: [PATCH 20/46] GH-125174: Mark objects as statically allocated. (#127797) * Set a bit in the unused part of the refcount on 64 bit machines and the free-threaded build. * Use the top of the refcount range on 32 bit machines --- Include/internal/pycore_object.h | 16 ++++++++++++- Include/object.h | 20 ++++++++++++---- Include/refcount.h | 36 +++++++++++++++++++++++++---- Lib/test/test_builtin.py | 2 +- Lib/test/test_capi/test_immortal.py | 16 +++++++++++++ Modules/_testinternalcapi.c | 10 ++++++++ Objects/object.c | 12 +++++++++- 7 files changed, 99 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 6b0b464a6fdb96..22de3c9d4e32ea 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -73,14 +73,24 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); #define _PyObject_HEAD_INIT(type) \ { \ .ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL, \ + .ob_flags = _Py_STATICALLY_ALLOCATED_FLAG, \ .ob_type = (type) \ } #else +#if SIZEOF_VOID_P > 4 #define _PyObject_HEAD_INIT(type) \ { \ - .ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT, \ + .ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT, \ + .ob_flags = _Py_STATICALLY_ALLOCATED_FLAG, \ .ob_type = (type) \ } +#else +#define _PyObject_HEAD_INIT(type) \ + { \ + .ob_refcnt = _Py_STATIC_IMMORTAL_INITIAL_REFCNT, \ + .ob_type = (type) \ + } +#endif #endif #define _PyVarObject_HEAD_INIT(type, size) \ { \ @@ -127,7 +137,11 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n) _Py_AddRefTotal(_PyThreadState_GET(), n); #endif #if !defined(Py_GIL_DISABLED) +#if SIZEOF_VOID_P > 4 + op->ob_refcnt += (PY_UINT32_T)n; +#else op->ob_refcnt += n; +#endif #else if (_Py_IsOwnedByCurrentThread(op)) { uint32_t local = op->ob_ref_local; diff --git a/Include/object.h b/Include/object.h index 3876d8449afbe2..da7b3668c033f4 100644 --- a/Include/object.h +++ b/Include/object.h @@ -71,7 +71,7 @@ whose size is determined when the object is allocated. #define PyObject_HEAD_INIT(type) \ { \ 0, \ - 0, \ + _Py_STATICALLY_ALLOCATED_FLAG, \ { 0 }, \ 0, \ _Py_IMMORTAL_REFCNT_LOCAL, \ @@ -81,7 +81,7 @@ whose size is determined when the object is allocated. #else #define PyObject_HEAD_INIT(type) \ { \ - { _Py_IMMORTAL_INITIAL_REFCNT }, \ + { _Py_STATIC_IMMORTAL_INITIAL_REFCNT }, \ (type) \ }, #endif @@ -120,9 +120,19 @@ struct _object { __pragma(warning(disable: 4201)) #endif union { - Py_ssize_t ob_refcnt; #if SIZEOF_VOID_P > 4 - PY_UINT32_T ob_refcnt_split[2]; + PY_INT64_T ob_refcnt_full; /* This field is needed for efficient initialization with Clang on ARM */ + struct { +# if PY_BIG_ENDIAN + PY_UINT32_T ob_flags; + PY_UINT32_T ob_refcnt; +# else + PY_UINT32_T ob_refcnt; + PY_UINT32_T ob_flags; +# endif + }; +#else + Py_ssize_t ob_refcnt; #endif }; #ifdef _MSC_VER @@ -142,7 +152,7 @@ struct _object { // trashcan mechanism as a linked list pointer and by the GC to store the // computed "gc_refs" refcount. uintptr_t ob_tid; - uint16_t _padding; + uint16_t ob_flags; PyMutex ob_mutex; // per-object lock uint8_t ob_gc_bits; // gc-related state uint32_t ob_ref_local; // local reference count diff --git a/Include/refcount.h b/Include/refcount.h index 141cbd34dd72e6..6908c426141378 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -19,6 +19,9 @@ immortal. The latter should be the only instances that require cleanup during runtime finalization. */ +/* Leave the low bits for refcount overflow for old stable ABI code */ +#define _Py_STATICALLY_ALLOCATED_FLAG (1 << 7) + #if SIZEOF_VOID_P > 4 /* In 64+ bit systems, any object whose 32 bit reference count is >= 2**31 @@ -39,7 +42,8 @@ beyond the refcount limit. Immortality checks for reference count decreases will be done by checking the bit sign flag in the lower 32 bits. */ -#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(3UL << 30)) +#define _Py_IMMORTAL_INITIAL_REFCNT (3UL << 30) +#define _Py_STATIC_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(_Py_IMMORTAL_INITIAL_REFCNT | (((Py_ssize_t)_Py_STATICALLY_ALLOCATED_FLAG) << 32))) #else /* @@ -54,8 +58,10 @@ immortality, but the execution would still be correct. Reference count increases and decreases will first go through an immortality check by comparing the reference count field to the minimum immortality refcount. */ -#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(3L << 29)) +#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(5L << 28)) #define _Py_IMMORTAL_MINIMUM_REFCNT ((Py_ssize_t)(1L << 30)) +#define _Py_STATIC_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(7L << 28)) +#define _Py_STATIC_IMMORTAL_MINIMUM_REFCNT ((Py_ssize_t)(6L << 28)) #endif // Py_GIL_DISABLED builds indicate immortal objects using `ob_ref_local`, which is @@ -123,10 +129,21 @@ static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) #define _Py_IsImmortal(op) _Py_IsImmortal(_PyObject_CAST(op)) +static inline Py_ALWAYS_INLINE int _Py_IsStaticImmortal(PyObject *op) +{ +#if defined(Py_GIL_DISABLED) || SIZEOF_VOID_P > 4 + return (op->ob_flags & _Py_STATICALLY_ALLOCATED_FLAG) != 0; +#else + return op->ob_refcnt >= _Py_STATIC_IMMORTAL_MINIMUM_REFCNT; +#endif +} +#define _Py_IsStaticImmortal(op) _Py_IsStaticImmortal(_PyObject_CAST(op)) + // Py_SET_REFCNT() implementation for stable ABI PyAPI_FUNC(void) _Py_SetRefcnt(PyObject *ob, Py_ssize_t refcnt); static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { + assert(refcnt >= 0); #if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030d0000 // Stable ABI implements Py_SET_REFCNT() as a function call // on limited C API version 3.13 and newer. @@ -139,9 +156,12 @@ static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { if (_Py_IsImmortal(ob)) { return; } - #ifndef Py_GIL_DISABLED +#if SIZEOF_VOID_P > 4 + ob->ob_refcnt = (PY_UINT32_T)refcnt; +#else ob->ob_refcnt = refcnt; +#endif #else if (_Py_IsOwnedByCurrentThread(ob)) { if ((size_t)refcnt > (size_t)UINT32_MAX) { @@ -252,13 +272,13 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); } #elif SIZEOF_VOID_P > 4 - PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; + PY_UINT32_T cur_refcnt = op->ob_refcnt; if (((int32_t)cur_refcnt) < 0) { // the object is immortal _Py_INCREF_IMMORTAL_STAT_INC(); return; } - op->ob_refcnt_split[PY_BIG_ENDIAN] = cur_refcnt + 1; + op->ob_refcnt = cur_refcnt + 1; #else if (_Py_IsImmortal(op)) { _Py_INCREF_IMMORTAL_STAT_INC(); @@ -354,7 +374,13 @@ static inline void Py_DECREF(PyObject *op) #elif defined(Py_REF_DEBUG) static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) { +#if SIZEOF_VOID_P > 4 + /* If an object has been freed, it will have a negative full refcnt + * If it has not it been freed, will have a very large refcnt */ + if (op->ob_refcnt_full <= 0 || op->ob_refcnt > (UINT32_MAX - (1<<20))) { +#else if (op->ob_refcnt <= 0) { +#endif _Py_NegativeRefcount(filename, lineno, op); } if (_Py_IsImmortal(op)) { diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index e51711d9b4f1a4..06df217881a52f 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2691,7 +2691,7 @@ def __del__(self): class ImmortalTests(unittest.TestCase): if sys.maxsize < (1 << 32): - IMMORTAL_REFCOUNT = 3 << 29 + IMMORTAL_REFCOUNT = 7 << 28 else: IMMORTAL_REFCOUNT = 3 << 30 diff --git a/Lib/test/test_capi/test_immortal.py b/Lib/test/test_capi/test_immortal.py index ef5d32b7f01935..3e36913ac301c3 100644 --- a/Lib/test/test_capi/test_immortal.py +++ b/Lib/test/test_capi/test_immortal.py @@ -2,6 +2,7 @@ from test.support import import_helper _testcapi = import_helper.import_module('_testcapi') +_testinternalcapi = import_helper.import_module('_testinternalcapi') class TestCAPI(unittest.TestCase): @@ -11,6 +12,21 @@ def test_immortal_builtins(self): def test_immortal_small_ints(self): _testcapi.test_immortal_small_ints() +class TestInternalCAPI(unittest.TestCase): + + def test_immortal_builtins(self): + for obj in range(-5, 256): + self.assertTrue(_testinternalcapi.is_static_immortal(obj)) + self.assertTrue(_testinternalcapi.is_static_immortal(None)) + self.assertTrue(_testinternalcapi.is_static_immortal(False)) + self.assertTrue(_testinternalcapi.is_static_immortal(True)) + self.assertTrue(_testinternalcapi.is_static_immortal(...)) + self.assertTrue(_testinternalcapi.is_static_immortal(())) + for obj in range(300, 400): + self.assertFalse(_testinternalcapi.is_static_immortal(obj)) + for obj in ([], {}, set()): + self.assertFalse(_testinternalcapi.is_static_immortal(obj)) + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 288daf09a5fe5c..014f89997f7f60 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2049,6 +2049,15 @@ get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size); } +static PyObject * +is_static_immortal(PyObject *self, PyObject *op) +{ + if (_Py_IsStaticImmortal(op)) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; +} + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -2146,6 +2155,7 @@ static PyMethodDef module_functions[] = { {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS}, {"has_deferred_refcount", has_deferred_refcount, METH_O}, {"get_tracked_heap_size", get_tracked_heap_size, METH_NOARGS}, + {"is_static_immortal", is_static_immortal, METH_O}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/object.c b/Objects/object.c index 74f47fa4239032..c64675b5e1d6c2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2475,10 +2475,16 @@ new_reference(PyObject *op) { // Skip the immortal object check in Py_SET_REFCNT; always set refcnt to 1 #if !defined(Py_GIL_DISABLED) +#if SIZEOF_VOID_P > 4 + op->ob_refcnt_full = 1; + assert(op->ob_refcnt == 1); + assert(op->ob_flags == 0); +#else op->ob_refcnt = 1; +#endif #else op->ob_tid = _Py_ThreadId(); - op->_padding = 0; + op->ob_flags = 0; op->ob_mutex = (PyMutex){ 0 }; op->ob_gc_bits = 0; op->ob_ref_local = 1; @@ -2515,6 +2521,10 @@ _Py_SetImmortalUntracked(PyObject *op) || PyUnicode_CHECK_INTERNED(op) == SSTATE_INTERNED_IMMORTAL_STATIC); } #endif + // Check if already immortal to avoid degrading from static immortal to plain immortal + if (_Py_IsImmortal(op)) { + return; + } #ifdef Py_GIL_DISABLED op->ob_tid = _Py_UNOWNED_TID; op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; From e8f4e272cc828f2b79fa17fc6b9786bdddab7ce4 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Wed, 11 Dec 2024 19:32:54 +0100 Subject: [PATCH 21/46] gh-111609: Test `end_offset` in SyntaxError subclass (#127830) Test `end_offset` in SyntaxError subclass --- Lib/test/test_exceptions.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 5beeac3adfc065..6ccfa9575f8569 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -2274,6 +2274,21 @@ def test_range_of_offsets(self): self.assertIn(expected, err.getvalue()) the_exception = exc + def test_subclass(self): + class MySyntaxError(SyntaxError): + pass + + try: + raise MySyntaxError("bad bad", ("bad.py", 1, 2, "abcdefg", 1, 7)) + except SyntaxError as exc: + with support.captured_stderr() as err: + sys.__excepthook__(*sys.exc_info()) + self.assertIn(""" + File "bad.py", line 1 + abcdefg + ^^^^^ +""", err.getvalue()) + def test_encodings(self): self.addCleanup(unlink, TESTFN) source = ( From c84928ed6de105696be24859e03f3ab27e11daf6 Mon Sep 17 00:00:00 2001 From: mpage Date: Wed, 11 Dec 2024 15:18:22 -0800 Subject: [PATCH 22/46] gh-115999: Specialize `CALL_KW` in free-threaded builds (#127713) * Enable specialization of CALL_KW * Fix bug pushing frame in _PY_FRAME_KW `_PY_FRAME_KW` pushes a pointer to the new frame onto the stack for consumption by the next uop. When pushing the frame fails, we do not want to push the result, `NULL`, to the stack because it is not a valid stackref. This works in the default build because `PyStackRef_NULL` and `NULL` are the same value, so the `PyStackRef_XCLOSE()` in the error handler ignores it. In the free-threaded build the values are not the same; `PyStackRef_XCLOSE()` will attempt to decref a null pointer. --- Python/bytecodes.c | 9 +++++---- Python/executor_cases.c.h | 11 +++++++---- Python/generated_cases.c.h | 24 ++++++++++-------------- Python/specialize.c | 17 ++++------------- 4 files changed, 26 insertions(+), 35 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3d280941b35244..d0e4c2bc45489b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4311,7 +4311,7 @@ dummy_func( assert(Py_TYPE(callable_o) == &PyFunction_Type); int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -4319,9 +4319,10 @@ dummy_func( // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. SYNC_SP(); - if (new_frame == NULL) { + if (temp == NULL) { ERROR_NO_POP(); } + new_frame = temp; } op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable[1], self_or_null[1], unused[oparg], kwnames -- callable[1], self_or_null[1], unused[oparg], kwnames)) { @@ -4372,7 +4373,7 @@ dummy_func( _PUSH_FRAME; specializing op(_SPECIALIZE_CALL_KW, (counter/1, callable[1], self_or_null[1], args[oparg], kwnames -- callable[1], self_or_null[1], args[oparg], kwnames)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_CallKw(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0])); @@ -4380,7 +4381,7 @@ dummy_func( } OPCODE_DEFERRED_INC(CALL_KW); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } macro(CALL_KW) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 987ff2e6419669..18f19773d25c90 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5235,7 +5235,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -5243,12 +5243,15 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { + if (temp == NULL) { JUMP_TO_ERROR(); } + new_frame = temp; + stack_pointer[0].bits = (uintptr_t)new_frame; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 33f32aba1e5145..fc0f55555f5c36 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1895,7 +1895,7 @@ callable = &stack_pointer[-3 - oparg]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -1905,7 +1905,7 @@ } OPCODE_DEFERRED_INC(CALL_KW); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 2 cache entries */ // _MAYBE_EXPAND_METHOD_KW @@ -2093,7 +2093,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -2101,12 +2101,12 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { + if (temp == NULL) { goto error; } + new_frame = temp; } // _SAVE_RETURN_OFFSET { @@ -2123,8 +2123,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); @@ -2271,7 +2269,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -2279,12 +2277,12 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { + if (temp == NULL) { goto error; } + new_frame = temp; } // _SAVE_RETURN_OFFSET { @@ -2301,8 +2299,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); diff --git a/Python/specialize.c b/Python/specialize.c index d3fea717243847..fd182e7d7a9215 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2107,7 +2107,7 @@ specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, return -1; } write_u32(cache->func_version, version); - instr->op.code = bound_method ? CALL_KW_BOUND_METHOD : CALL_KW_PY; + specialize(instr, bound_method ? CALL_KW_BOUND_METHOD : CALL_KW_PY); return 0; } @@ -2202,10 +2202,9 @@ _Py_Specialize_CallKw(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) { PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CALL_KW] == INLINE_CACHE_ENTRIES_CALL_KW); assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL_KW); - _PyCallCache *cache = (_PyCallCache *)(instr + 1); int fail; if (PyFunction_Check(callable)) { fail = specialize_py_call_kw((PyFunctionObject *)callable, instr, nargs, false); @@ -2221,19 +2220,11 @@ _Py_Specialize_CallKw(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) } } else { - instr->op.code = CALL_KW_NON_PY; + specialize(instr, CALL_KW_NON_PY); fail = 0; } if (fail) { - STAT_INC(CALL, failure); - assert(!PyErr_Occurred()); - instr->op.code = CALL_KW; - cache->counter = adaptive_counter_backoff(cache->counter); - } - else { - STAT_INC(CALL, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } } From 41f29e5d16c314790559e563ce5ca0334fcd54df Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 12 Dec 2024 02:11:00 +0100 Subject: [PATCH 23/46] gh-127146: Some expected failures in Emscripten time tests (#127843) Disables two tests in the test_time suite, and adjusts test_os to reflect precision limits in Emscripten. --- Lib/test/test_os.py | 28 +++++++++++++++++++++------- Lib/test/test_time.py | 4 ++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index f3d2ceb263f6f4..8aac92934f6ac0 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -805,14 +805,28 @@ def _test_utime(self, set_time, filename=None): set_time(filename, (atime_ns, mtime_ns)) st = os.stat(filename) - if support_subsecond: - self.assertAlmostEqual(st.st_atime, atime_ns * 1e-9, delta=1e-6) - self.assertAlmostEqual(st.st_mtime, mtime_ns * 1e-9, delta=1e-6) + if support.is_emscripten: + # Emscripten timestamps are roundtripped through a 53 bit integer of + # nanoseconds. If we want to represent ~50 years which is an 11 + # digits number of seconds: + # 2*log10(60) + log10(24) + log10(365) + log10(60) + log10(50) + # is about 11. Because 53 * log10(2) is about 16, we only have 5 + # digits worth of sub-second precision. + # Some day it would be good to fix this upstream. + delta=1e-5 + self.assertAlmostEqual(st.st_atime, atime_ns * 1e-9, delta=1e-5) + self.assertAlmostEqual(st.st_mtime, mtime_ns * 1e-9, delta=1e-5) + self.assertAlmostEqual(st.st_atime_ns, atime_ns, delta=1e9 * 1e-5) + self.assertAlmostEqual(st.st_mtime_ns, mtime_ns, delta=1e9 * 1e-5) else: - self.assertEqual(st.st_atime, atime_ns * 1e-9) - self.assertEqual(st.st_mtime, mtime_ns * 1e-9) - self.assertEqual(st.st_atime_ns, atime_ns) - self.assertEqual(st.st_mtime_ns, mtime_ns) + if support_subsecond: + self.assertAlmostEqual(st.st_atime, atime_ns * 1e-9, delta=1e-6) + self.assertAlmostEqual(st.st_mtime, mtime_ns * 1e-9, delta=1e-6) + else: + self.assertEqual(st.st_atime, atime_ns * 1e-9) + self.assertEqual(st.st_mtime, mtime_ns * 1e-9) + self.assertEqual(st.st_atime_ns, atime_ns) + self.assertEqual(st.st_mtime_ns, mtime_ns) def test_utime(self): def set_time(filename, ns): diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index d368f08b610870..92398300f26577 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -116,6 +116,7 @@ def test_clock_monotonic(self): 'need time.pthread_getcpuclockid()') @unittest.skipUnless(hasattr(time, 'clock_gettime'), 'need time.clock_gettime()') + @unittest.skipIf(support.is_emscripten, "Fails to find clock") def test_pthread_getcpuclockid(self): clk_id = time.pthread_getcpuclockid(threading.get_ident()) self.assertTrue(type(clk_id) is int) @@ -539,6 +540,9 @@ def test_perf_counter(self): @unittest.skipIf( support.is_wasi, "process_time not available on WASI" ) + @unittest.skipIf( + support.is_emscripten, "process_time present but doesn't exclude sleep" + ) def test_process_time(self): # process_time() should not include time spend during a sleep start = time.process_time() From c33b6fbf358c1bc14b20e14a1fffff62c6826ecd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Srinivas=20Reddy=20Thatiparthy=20=28=E0=B0=A4=E0=B0=BE?= =?UTF-8?q?=E0=B0=9F=E0=B0=BF=E0=B0=AA=E0=B0=B0=E0=B1=8D=E0=B0=A4=E0=B0=BF?= =?UTF-8?q?=20=E0=B0=B6=E0=B1=8D=E0=B0=B0=E0=B1=80=E0=B0=A8=E0=B0=BF?= =?UTF-8?q?=E0=B0=B5=E0=B0=BE=E0=B0=B8=E0=B1=8D=20=20=E0=B0=B0=E0=B1=86?= =?UTF-8?q?=E0=B0=A1=E0=B1=8D=E0=B0=A1=E0=B0=BF=29?= Date: Thu, 12 Dec 2024 07:48:12 +0530 Subject: [PATCH 24/46] gh-127740: Add some more tests for earlier PR #127756 (#127818) --- Lib/test/test_bytes.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 32cd178fa3b445..7bb1ab38aa4fdf 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -464,6 +464,10 @@ def test_fromhex(self): with self.assertRaises(ValueError) as cm: self.type2test.fromhex(value) self.assertIn("fromhex() arg must contain an even number of hexadecimal digits", str(cm.exception)) + for value, position in (("a ", 1), (" aa a ", 5), (" aa a a ", 5)): + with self.assertRaises(ValueError) as cm: + self.type2test.fromhex(value) + self.assertIn(f"non-hexadecimal number found in fromhex() arg at position {position}", str(cm.exception)) for data, pos in ( # invalid first hexadecimal character From 8bbd379ee30db0320ec3d31c37aee2a503902b0f Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 11 Dec 2024 21:24:56 -0600 Subject: [PATCH 25/46] Simplify and speed-up an itertools recipe (gh-127848) --- Doc/library/itertools.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 03966f3d3d694b..3b90d7830f3681 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -1015,7 +1015,7 @@ The following recipes have a more mathematical flavor: .. testcode:: def powerset(iterable): - "powerset([1,2,3]) → () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)" + # powerset([1,2,3]) → () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3) s = list(iterable) return chain.from_iterable(combinations(s, r) for r in range(len(s)+1)) @@ -1104,11 +1104,6 @@ The following recipes have a more mathematical flavor: data[p*p : n : p+p] = bytes(len(range(p*p, n, p+p))) yield from iter_index(data, 1, start=3) - def is_prime(n): - "Return True if n is prime." - # is_prime(1_000_000_000_000_403) → True - return n > 1 and all(n % p for p in sieve(math.isqrt(n) + 1)) - def factor(n): "Prime factors of n." # factor(99) → 3 3 11 @@ -1123,6 +1118,11 @@ The following recipes have a more mathematical flavor: if n > 1: yield n + def is_prime(n): + "Return True if n is prime." + # is_prime(1_000_000_000_000_403) → True + return n > 1 and next(factor(n)) == n + def totient(n): "Count of natural numbers up to n that are coprime to n." # https://mathworld.wolfram.com/TotientFunction.html From 292afd1d51dd7aacb12a6165f596ae7bb58c9ba8 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 12 Dec 2024 06:49:34 +0000 Subject: [PATCH 26/46] GH-127381: pathlib ABCs: remove remaining uncommon `PathBase` methods (#127714) Remove the following methods from `pathlib._abc.PathBase`: - `expanduser()` - `hardlink_to()` - `touch()` - `chmod()` - `lchmod()` - `owner()` - `group()` - `from_uri()` - `as_uri()` These operations aren't regularly supported in virtual filesystems, so they don't win a place in the `PathBase` interface. (Some of them probably don't deserve a place in `Path` :P.) They're quasi-abstract (except `lchmod()`), and they're not called by other `PathBase` methods. --- Lib/pathlib/_abc.py | 54 ----------------------- Lib/pathlib/_local.py | 28 +++++++++++- Lib/test/test_pathlib/test_pathlib_abc.py | 12 ----- 3 files changed, 27 insertions(+), 67 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 02c6e0500617aa..b10aba85132332 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -571,12 +571,6 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): yield path, dirnames, filenames paths += [path.joinpath(d) for d in reversed(dirnames)] - def expanduser(self): - """ Return a new path with expanded ~ and ~user constructs - (as returned by os.path.expanduser) - """ - raise UnsupportedOperation(self._unsupported_msg('expanduser()')) - def readlink(self): """ Return the path to which the symbolic link points. @@ -597,20 +591,6 @@ def _symlink_to_target_of(self, link): """ self.symlink_to(link.readlink()) - def hardlink_to(self, target): - """ - Make this path a hard link pointing to the same file as *target*. - - Note the order of arguments (self, target) is the reverse of os.link's. - """ - raise UnsupportedOperation(self._unsupported_msg('hardlink_to()')) - - def touch(self, mode=0o666, exist_ok=True): - """ - Create this file with the given access mode, if it doesn't exist. - """ - raise UnsupportedOperation(self._unsupported_msg('touch()')) - def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. @@ -729,37 +709,3 @@ def move_into(self, target_dir): else: target = self.with_segments(target_dir, name) return self.move(target) - - def chmod(self, mode, *, follow_symlinks=True): - """ - Change the permissions of the path, like os.chmod(). - """ - raise UnsupportedOperation(self._unsupported_msg('chmod()')) - - def lchmod(self, mode): - """ - Like chmod(), except if the path points to a symlink, the symlink's - permissions are changed, rather than its target's. - """ - self.chmod(mode, follow_symlinks=False) - - def owner(self, *, follow_symlinks=True): - """ - Return the login name of the file owner. - """ - raise UnsupportedOperation(self._unsupported_msg('owner()')) - - def group(self, *, follow_symlinks=True): - """ - Return the group name of the file gid. - """ - raise UnsupportedOperation(self._unsupported_msg('group()')) - - @classmethod - def from_uri(cls, uri): - """Return a new path from the given 'file' URI.""" - raise UnsupportedOperation(cls._unsupported_msg('from_uri()')) - - def as_uri(self): - """Return the path as a URI.""" - raise UnsupportedOperation(self._unsupported_msg('as_uri()')) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 85437ec80bfcc4..0dfe9d2390ecff 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -526,7 +526,6 @@ class Path(PathBase, PurePath): but cannot instantiate a WindowsPath on a POSIX system or vice versa. """ __slots__ = () - as_uri = PurePath.as_uri @classmethod def _unsupported_msg(cls, attribute): @@ -813,6 +812,12 @@ def owner(self, *, follow_symlinks=True): """ uid = self.stat(follow_symlinks=follow_symlinks).st_uid return pwd.getpwuid(uid).pw_name + else: + def owner(self, *, follow_symlinks=True): + """ + Return the login name of the file owner. + """ + raise UnsupportedOperation(self._unsupported_msg('owner()')) if grp: def group(self, *, follow_symlinks=True): @@ -821,6 +826,12 @@ def group(self, *, follow_symlinks=True): """ gid = self.stat(follow_symlinks=follow_symlinks).st_gid return grp.getgrgid(gid).gr_name + else: + def group(self, *, follow_symlinks=True): + """ + Return the group name of the file gid. + """ + raise UnsupportedOperation(self._unsupported_msg('group()')) if hasattr(os, "readlink"): def readlink(self): @@ -892,6 +903,13 @@ def chmod(self, mode, *, follow_symlinks=True): """ os.chmod(self, mode, follow_symlinks=follow_symlinks) + def lchmod(self, mode): + """ + Like chmod(), except if the path points to a symlink, the symlink's + permissions are changed, rather than its target's. + """ + self.chmod(mode, follow_symlinks=False) + def unlink(self, missing_ok=False): """ Remove this file or link. @@ -988,6 +1006,14 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ os.link(target, self) + else: + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + raise UnsupportedOperation(self._unsupported_msg('hardlink_to()')) def expanduser(self): """ Return a new path with expanded ~ and ~user constructs diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index f4c364c6fe5109..d770b87dc6a104 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1312,21 +1312,9 @@ def test_unsupported_operation(self): self.assertRaises(e, lambda: list(p.glob('*'))) self.assertRaises(e, lambda: list(p.rglob('*'))) self.assertRaises(e, lambda: list(p.walk())) - self.assertRaises(e, p.expanduser) self.assertRaises(e, p.readlink) self.assertRaises(e, p.symlink_to, 'foo') - self.assertRaises(e, p.hardlink_to, 'foo') self.assertRaises(e, p.mkdir) - self.assertRaises(e, p.touch) - self.assertRaises(e, p.chmod, 0o755) - self.assertRaises(e, p.lchmod, 0o755) - self.assertRaises(e, p.owner) - self.assertRaises(e, p.group) - self.assertRaises(e, p.as_uri) - - def test_as_uri_common(self): - e = UnsupportedOperation - self.assertRaises(e, self.cls('').as_uri) def test_fspath_common(self): self.assertRaises(TypeError, os.fspath, self.cls('')) From 487fdbed40734fd7721457c6f6ffeca03da0b0e7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 12 Dec 2024 11:22:20 +0000 Subject: [PATCH 27/46] GH-125174: Fix compiler warning (GH-127860) Fix compiler warning --- Include/internal/pycore_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 22de3c9d4e32ea..668ea47ca727e2 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -184,7 +184,7 @@ PyAPI_FUNC(void) _Py_SetImmortalUntracked(PyObject *op); // Makes an immortal object mortal again with the specified refcnt. Should only // be used during runtime finalization. -static inline void _Py_SetMortal(PyObject *op, Py_ssize_t refcnt) +static inline void _Py_SetMortal(PyObject *op, short refcnt) { if (op) { assert(_Py_IsImmortal(op)); From 7146f1894638130940944d4808dae7d144d46227 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 12 Dec 2024 17:39:24 +0000 Subject: [PATCH 28/46] GH-127807: pathlib ABCs: remove `PathBase._unsupported_msg()` (#127855) This method helped us customise the `UnsupportedOperation` message depending on the type. But we're aiming to make `PathBase` a proper ABC soon, so `NotImplementedError` is the right exception to raise there. --- Lib/pathlib/__init__.py | 4 +-- Lib/pathlib/_abc.py | 35 +++++++-------------- Lib/pathlib/_local.py | 37 ++++++++++++++++++----- Lib/test/test_pathlib/test_pathlib.py | 8 +++++ Lib/test/test_pathlib/test_pathlib_abc.py | 12 ++------ 5 files changed, 52 insertions(+), 44 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 5da3acd31997e5..ec1bac9ef49350 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -5,8 +5,6 @@ operating systems. """ -from pathlib._abc import * from pathlib._local import * -__all__ = (_abc.__all__ + - _local.__all__) +__all__ = _local.__all__ diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b10aba85132332..b4560295300c28 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -20,15 +20,6 @@ from pathlib._os import copyfileobj -__all__ = ["UnsupportedOperation"] - - -class UnsupportedOperation(NotImplementedError): - """An exception that is raised when an unsupported operation is attempted. - """ - pass - - @functools.cache def _is_case_sensitive(parser): return parser.normcase('Aa') == 'Aa' @@ -353,8 +344,8 @@ class PathBase(PurePathBase): This class provides dummy implementations for many methods that derived classes can override selectively; the default implementations raise - UnsupportedOperation. The most basic methods, such as stat() and open(), - directly raise UnsupportedOperation; these basic methods are called by + NotImplementedError. The most basic methods, such as stat() and open(), + directly raise NotImplementedError; these basic methods are called by other methods such as is_dir() and read_text(). The Path class derives this class to implement local filesystem paths. @@ -363,16 +354,12 @@ class PathBase(PurePathBase): """ __slots__ = () - @classmethod - def _unsupported_msg(cls, attribute): - return f"{cls.__name__}.{attribute} is unsupported" - def stat(self, *, follow_symlinks=True): """ Return the result of the stat() system call on this path, like os.stat() does. """ - raise UnsupportedOperation(self._unsupported_msg('stat()')) + raise NotImplementedError # Convenience functions for querying the stat results @@ -448,7 +435,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed to by this path and return a file object, as the built-in open() function does. """ - raise UnsupportedOperation(self._unsupported_msg('open()')) + raise NotImplementedError def read_bytes(self): """ @@ -498,7 +485,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - raise UnsupportedOperation(self._unsupported_msg('iterdir()')) + raise NotImplementedError def _glob_selector(self, parts, case_sensitive, recurse_symlinks): if case_sensitive is None: @@ -575,14 +562,14 @@ def readlink(self): """ Return the path to which the symbolic link points. """ - raise UnsupportedOperation(self._unsupported_msg('readlink()')) + raise NotImplementedError def symlink_to(self, target, target_is_directory=False): """ Make this path a symlink pointing to the target path. Note the order of arguments (link, target) is the reverse of os.symlink. """ - raise UnsupportedOperation(self._unsupported_msg('symlink_to()')) + raise NotImplementedError def _symlink_to_target_of(self, link): """ @@ -595,7 +582,7 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. """ - raise UnsupportedOperation(self._unsupported_msg('mkdir()')) + raise NotImplementedError # Metadata keys supported by this path type. _readable_metadata = _writable_metadata = frozenset() @@ -604,13 +591,13 @@ def _read_metadata(self, keys=None, *, follow_symlinks=True): """ Returns path metadata as a dict with string keys. """ - raise UnsupportedOperation(self._unsupported_msg('_read_metadata()')) + raise NotImplementedError def _write_metadata(self, metadata, *, follow_symlinks=True): """ Sets path metadata from the given dict with string keys. """ - raise UnsupportedOperation(self._unsupported_msg('_write_metadata()')) + raise NotImplementedError def _copy_metadata(self, target, *, follow_symlinks=True): """ @@ -687,7 +674,7 @@ def _delete(self): """ Delete this file or directory (including all sub-directories). """ - raise UnsupportedOperation(self._unsupported_msg('_delete()')) + raise NotImplementedError def move(self, target): """ diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 0dfe9d2390ecff..b933dd512eeb28 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -21,15 +21,22 @@ from pathlib._os import (copyfile, file_metadata_keys, read_file_metadata, write_file_metadata) -from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase +from pathlib._abc import PurePathBase, PathBase __all__ = [ + "UnsupportedOperation", "PurePath", "PurePosixPath", "PureWindowsPath", "Path", "PosixPath", "WindowsPath", ] +class UnsupportedOperation(NotImplementedError): + """An exception that is raised when an unsupported operation is attempted. + """ + pass + + class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" @@ -527,10 +534,6 @@ class Path(PathBase, PurePath): """ __slots__ = () - @classmethod - def _unsupported_msg(cls, attribute): - return f"{cls.__name__}.{attribute} is unsupported on this system" - def __new__(cls, *args, **kwargs): if cls is Path: cls = WindowsPath if os.name == 'nt' else PosixPath @@ -817,7 +820,8 @@ def owner(self, *, follow_symlinks=True): """ Return the login name of the file owner. """ - raise UnsupportedOperation(self._unsupported_msg('owner()')) + f = f"{type(self).__name__}.owner()" + raise UnsupportedOperation(f"{f} is unsupported on this system") if grp: def group(self, *, follow_symlinks=True): @@ -831,7 +835,8 @@ def group(self, *, follow_symlinks=True): """ Return the group name of the file gid. """ - raise UnsupportedOperation(self._unsupported_msg('group()')) + f = f"{type(self).__name__}.group()" + raise UnsupportedOperation(f"{f} is unsupported on this system") if hasattr(os, "readlink"): def readlink(self): @@ -839,6 +844,13 @@ def readlink(self): Return the path to which the symbolic link points. """ return self.with_segments(os.readlink(self)) + else: + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + f = f"{type(self).__name__}.readlink()" + raise UnsupportedOperation(f"{f} is unsupported on this system") def touch(self, mode=0o666, exist_ok=True): """ @@ -989,6 +1001,14 @@ def symlink_to(self, target, target_is_directory=False): Note the order of arguments (link, target) is the reverse of os.symlink. """ os.symlink(target, self, target_is_directory) + else: + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + f = f"{type(self).__name__}.symlink_to()" + raise UnsupportedOperation(f"{f} is unsupported on this system") if os.name == 'nt': def _symlink_to_target_of(self, link): @@ -1013,7 +1033,8 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ - raise UnsupportedOperation(self._unsupported_msg('hardlink_to()')) + f = f"{type(self).__name__}.hardlink_to()" + raise UnsupportedOperation(f"{f} is unsupported on this system") def expanduser(self): """ Return a new path with expanded ~ and ~user constructs diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index b57ef420bfcbcd..68bff2cf0d511e 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -63,6 +63,14 @@ def needs_symlinks(fn): _tests_needing_symlinks.add(fn.__name__) return fn + + +class UnsupportedOperationTest(unittest.TestCase): + def test_is_notimplemented(self): + self.assertTrue(issubclass(pathlib.UnsupportedOperation, NotImplementedError)) + self.assertTrue(isinstance(pathlib.UnsupportedOperation(), NotImplementedError)) + + # # Tests for the pure classes. # diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index d770b87dc6a104..e230dd188799a5 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -5,7 +5,7 @@ import stat import unittest -from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase +from pathlib._abc import PurePathBase, PathBase from pathlib._types import Parser import posixpath @@ -27,11 +27,6 @@ def needs_windows(fn): return fn -class UnsupportedOperationTest(unittest.TestCase): - def test_is_notimplemented(self): - self.assertTrue(issubclass(UnsupportedOperation, NotImplementedError)) - self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError)) - # # Tests for the pure classes. # @@ -1294,10 +1289,9 @@ def test_is_absolute_windows(self): class PathBaseTest(PurePathBaseTest): cls = PathBase - def test_unsupported_operation(self): - P = self.cls + def test_not_implemented_error(self): p = self.cls('') - e = UnsupportedOperation + e = NotImplementedError self.assertRaises(e, p.stat) self.assertRaises(e, p.exists) self.assertRaises(e, p.is_dir) From f8dcb8200626a1a06c4a26d8129257f42658a9ff Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 12 Dec 2024 17:59:13 +0000 Subject: [PATCH 29/46] gh-127879: Fix data race in `_PyFreeList_Push` (#127880) Writes to the `ob_tid` field need to use atomics because it may be concurrently read by a non-locking dictionary, list, or structmember read. --- Include/internal/pycore_freelist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index da2d7bf6ae1393..84a5ab30f3eeea 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -51,7 +51,7 @@ static inline int _PyFreeList_Push(struct _Py_freelist *fl, void *obj, Py_ssize_t maxsize) { if (fl->size < maxsize && fl->size >= 0) { - *(void **)obj = fl->freelist; + FT_ATOMIC_STORE_PTR_RELAXED(*(void **)obj, fl->freelist); fl->freelist = obj; fl->size++; OBJECT_STAT_INC(to_freelist); From f823910bbd4bf01ec3e1ab7b3cb1d77815138296 Mon Sep 17 00:00:00 2001 From: velemas <10437413+velemas@users.noreply.github.com> Date: Thu, 12 Dec 2024 20:07:55 +0200 Subject: [PATCH 30/46] gh-127865: Fix build failure for systems without thread local support (GH-127866) This PR fixes the build issue introduced by the commit 628f6eb from GH-112207 on systems without thread local support. --- .../Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst | 1 + Python/import.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst diff --git a/Misc/NEWS.d/next/Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst b/Misc/NEWS.d/next/Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst new file mode 100644 index 00000000000000..3fc1d8a1b51d30 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-12-12-17-21-45.gh-issue-127865.30GDzs.rst @@ -0,0 +1 @@ +Fix build failure on systems without thread-locals support. diff --git a/Python/import.c b/Python/import.c index b3c384c27718ce..f3511aaf7b8010 100644 --- a/Python/import.c +++ b/Python/import.c @@ -749,7 +749,7 @@ const char * _PyImport_ResolveNameWithPackageContext(const char *name) { #ifndef HAVE_THREAD_LOCAL - PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK); + PyMutex_Lock(&EXTENSIONS.mutex); #endif if (PKGCONTEXT != NULL) { const char *p = strrchr(PKGCONTEXT, '.'); @@ -759,7 +759,7 @@ _PyImport_ResolveNameWithPackageContext(const char *name) } } #ifndef HAVE_THREAD_LOCAL - PyThread_release_lock(EXTENSIONS.mutex); + PyMutex_Unlock(&EXTENSIONS.mutex); #endif return name; } @@ -768,12 +768,12 @@ const char * _PyImport_SwapPackageContext(const char *newcontext) { #ifndef HAVE_THREAD_LOCAL - PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK); + PyMutex_Lock(&EXTENSIONS.mutex); #endif const char *oldcontext = PKGCONTEXT; PKGCONTEXT = newcontext; #ifndef HAVE_THREAD_LOCAL - PyThread_release_lock(EXTENSIONS.mutex); + PyMutex_Unlock(&EXTENSIONS.mutex); #endif return oldcontext; } From 365451e28368db46ae89a3a990d85c10c2284aa2 Mon Sep 17 00:00:00 2001 From: Andrey Efremov Date: Fri, 13 Dec 2024 03:17:39 +0700 Subject: [PATCH 31/46] gh-127353: Allow to force color output on Windows (#127354) --- Lib/_colorize.py | 17 +++++---- Lib/test/test__colorize.py | 37 +++++++++++++++++++ ...-11-28-15-55-48.gh-issue-127353.i-XOXg.rst | 2 + 3 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 845fb57a90abb8..709081e25ec59b 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -32,14 +32,6 @@ def get_colors(colorize: bool = False) -> ANSIColors: def can_colorize() -> bool: - if sys.platform == "win32": - try: - import nt - - if not nt._supports_virtual_terminal(): - return False - except (ImportError, AttributeError): - return False if not sys.flags.ignore_environment: if os.environ.get("PYTHON_COLORS") == "0": return False @@ -58,6 +50,15 @@ def can_colorize() -> bool: if not hasattr(sys.stderr, "fileno"): return False + if sys.platform == "win32": + try: + import nt + + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False + try: return os.isatty(sys.stderr.fileno()) except io.UnsupportedOperation: diff --git a/Lib/test/test__colorize.py b/Lib/test/test__colorize.py index d55b97ade68cef..7a65d63f49eed7 100644 --- a/Lib/test/test__colorize.py +++ b/Lib/test/test__colorize.py @@ -50,10 +50,47 @@ def test_colorized_detection_checks_for_environment_variables(self): with unittest.mock.patch("os.environ", {'FORCE_COLOR': '1', "PYTHON_COLORS": '0'}): self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", {}): + self.assertEqual(_colorize.can_colorize(), True) + isatty_mock.return_value = False with unittest.mock.patch("os.environ", {}): self.assertEqual(_colorize.can_colorize(), False) + @force_not_colorized + @unittest.skipUnless(sys.platform == "win32", "Windows only") + def test_colorized_detection_checks_for_environment_variables_no_vt(self): + with (unittest.mock.patch("nt._supports_virtual_terminal", return_value=False), + unittest.mock.patch("os.isatty") as isatty_mock, + unittest.mock.patch("sys.flags", unittest.mock.MagicMock(ignore_environment=False)), + unittest.mock.patch("_colorize.can_colorize", ORIGINAL_CAN_COLORIZE)): + isatty_mock.return_value = True + with unittest.mock.patch("os.environ", {'TERM': 'dumb'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", {'PYTHON_COLORS': '1'}): + self.assertEqual(_colorize.can_colorize(), True) + with unittest.mock.patch("os.environ", {'PYTHON_COLORS': '0'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", {'NO_COLOR': '1'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", + {'NO_COLOR': '1', "PYTHON_COLORS": '1'}): + self.assertEqual(_colorize.can_colorize(), True) + with unittest.mock.patch("os.environ", {'FORCE_COLOR': '1'}): + self.assertEqual(_colorize.can_colorize(), True) + with unittest.mock.patch("os.environ", + {'FORCE_COLOR': '1', 'NO_COLOR': '1'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", + {'FORCE_COLOR': '1', "PYTHON_COLORS": '0'}): + self.assertEqual(_colorize.can_colorize(), False) + with unittest.mock.patch("os.environ", {}): + self.assertEqual(_colorize.can_colorize(), False) + + isatty_mock.return_value = False + with unittest.mock.patch("os.environ", {}): + self.assertEqual(_colorize.can_colorize(), False) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst b/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst new file mode 100644 index 00000000000000..88661b9a611071 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst @@ -0,0 +1,2 @@ +Allow to force color output on Windows using environment variables. Patch by +Andrey Efremov. From ed037d229f64db90aea00f397e9ce1b2f4a22d3f Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 12 Dec 2024 20:27:29 +0000 Subject: [PATCH 32/46] Fix typos in `Lib/_pydecimal.py` (#127700) --- Lib/_pydecimal.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 5b60570c6c592a..ec036199331396 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -97,7 +97,7 @@ class DecimalException(ArithmeticError): Used exceptions derive from this. If an exception derives from another exception besides this (such as - Underflow (Inexact, Rounded, Subnormal) that indicates that it is only + Underflow (Inexact, Rounded, Subnormal)) that indicates that it is only called if the others are present. This isn't actually used for anything, though. @@ -145,7 +145,7 @@ class InvalidOperation(DecimalException): x ** (+-)INF An operand is invalid - The result of the operation after these is a quiet positive NaN, + The result of the operation after this is a quiet positive NaN, except when the cause is a signaling NaN, in which case the result is also a quiet NaN, but with the original sign, and an optional diagnostic information. From a8ffe661548e16ad02dbe6cb8a89513d7ed2a42c Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 12 Dec 2024 23:11:20 +0200 Subject: [PATCH 33/46] Revert "gh-127353: Allow to force color output on Windows (#127354)" (#127889) This reverts commit 365451e28368db46ae89a3a990d85c10c2284aa2. --- Lib/_colorize.py | 17 ++++----- Lib/test/test__colorize.py | 37 ------------------- ...-11-28-15-55-48.gh-issue-127353.i-XOXg.rst | 2 - 3 files changed, 8 insertions(+), 48 deletions(-) delete mode 100644 Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 709081e25ec59b..845fb57a90abb8 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -32,6 +32,14 @@ def get_colors(colorize: bool = False) -> ANSIColors: def can_colorize() -> bool: + if sys.platform == "win32": + try: + import nt + + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False if not sys.flags.ignore_environment: if os.environ.get("PYTHON_COLORS") == "0": return False @@ -50,15 +58,6 @@ def can_colorize() -> bool: if not hasattr(sys.stderr, "fileno"): return False - if sys.platform == "win32": - try: - import nt - - if not nt._supports_virtual_terminal(): - return False - except (ImportError, AttributeError): - return False - try: return os.isatty(sys.stderr.fileno()) except io.UnsupportedOperation: diff --git a/Lib/test/test__colorize.py b/Lib/test/test__colorize.py index 7a65d63f49eed7..d55b97ade68cef 100644 --- a/Lib/test/test__colorize.py +++ b/Lib/test/test__colorize.py @@ -50,47 +50,10 @@ def test_colorized_detection_checks_for_environment_variables(self): with unittest.mock.patch("os.environ", {'FORCE_COLOR': '1', "PYTHON_COLORS": '0'}): self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", {}): - self.assertEqual(_colorize.can_colorize(), True) - isatty_mock.return_value = False with unittest.mock.patch("os.environ", {}): self.assertEqual(_colorize.can_colorize(), False) - @force_not_colorized - @unittest.skipUnless(sys.platform == "win32", "Windows only") - def test_colorized_detection_checks_for_environment_variables_no_vt(self): - with (unittest.mock.patch("nt._supports_virtual_terminal", return_value=False), - unittest.mock.patch("os.isatty") as isatty_mock, - unittest.mock.patch("sys.flags", unittest.mock.MagicMock(ignore_environment=False)), - unittest.mock.patch("_colorize.can_colorize", ORIGINAL_CAN_COLORIZE)): - isatty_mock.return_value = True - with unittest.mock.patch("os.environ", {'TERM': 'dumb'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", {'PYTHON_COLORS': '1'}): - self.assertEqual(_colorize.can_colorize(), True) - with unittest.mock.patch("os.environ", {'PYTHON_COLORS': '0'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", {'NO_COLOR': '1'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", - {'NO_COLOR': '1', "PYTHON_COLORS": '1'}): - self.assertEqual(_colorize.can_colorize(), True) - with unittest.mock.patch("os.environ", {'FORCE_COLOR': '1'}): - self.assertEqual(_colorize.can_colorize(), True) - with unittest.mock.patch("os.environ", - {'FORCE_COLOR': '1', 'NO_COLOR': '1'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", - {'FORCE_COLOR': '1', "PYTHON_COLORS": '0'}): - self.assertEqual(_colorize.can_colorize(), False) - with unittest.mock.patch("os.environ", {}): - self.assertEqual(_colorize.can_colorize(), False) - - isatty_mock.return_value = False - with unittest.mock.patch("os.environ", {}): - self.assertEqual(_colorize.can_colorize(), False) - if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst b/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst deleted file mode 100644 index 88661b9a611071..00000000000000 --- a/Misc/NEWS.d/next/Windows/2024-11-28-15-55-48.gh-issue-127353.i-XOXg.rst +++ /dev/null @@ -1,2 +0,0 @@ -Allow to force color output on Windows using environment variables. Patch by -Andrey Efremov. From 8ac307f0d6834148471d2e12a45bf022e659164c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns=20=F0=9F=87=B5=F0=9F=87=B8?= Date: Thu, 12 Dec 2024 21:41:46 +0000 Subject: [PATCH 34/46] GH-127724: don't use sysconfig to calculate the venv local include path (#127731) --- Lib/venv/__init__.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index ca1af84e6705fe..dc4c9ef3531991 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -103,8 +103,6 @@ def _venv_path(self, env_dir, name): vars = { 'base': env_dir, 'platbase': env_dir, - 'installed_base': env_dir, - 'installed_platbase': env_dir, } return sysconfig.get_path(name, scheme='venv', vars=vars) @@ -175,9 +173,20 @@ def create_if_needed(d): context.python_dir = dirname context.python_exe = exename binpath = self._venv_path(env_dir, 'scripts') - incpath = self._venv_path(env_dir, 'include') libpath = self._venv_path(env_dir, 'purelib') + # PEP 405 says venvs should create a local include directory. + # See https://peps.python.org/pep-0405/#include-files + # XXX: This directory is not exposed in sysconfig or anywhere else, and + # doesn't seem to be utilized by modern packaging tools. We keep it + # for backwards-compatibility, and to follow the PEP, but I would + # recommend against using it, as most tooling does not pass it to + # compilers. Instead, until we standardize a site-specific include + # directory, I would recommend installing headers as package data, + # and providing some sort of API to get the include directories. + # Example: https://numpy.org/doc/2.1/reference/generated/numpy.get_include.html + incpath = os.path.join(env_dir, 'Include' if os.name == 'nt' else 'include') + context.inc_path = incpath create_if_needed(incpath) context.lib_path = libpath From 0cbc19d59e409854f2b9bdda75e1af2b6cd89ac2 Mon Sep 17 00:00:00 2001 From: Daniel Haag <121057143+denialhaag@users.noreply.github.com> Date: Thu, 12 Dec 2024 22:43:44 +0100 Subject: [PATCH 35/46] Fix typo in traceback docs (#127884) --- Doc/library/traceback.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 4899ed64ebad8d..b0ee3fc56ad735 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -274,7 +274,7 @@ Module-Level Functions :class:`!TracebackException` objects are created from actual exceptions to capture data for later printing. They offer a more lightweight method of storing this information by avoiding holding references to -:ref:`traceback` and :ref:`frame` objects +:ref:`traceback` and :ref:`frame` objects. In addition, they expose more options to configure the output compared to the module-level functions described above. From ba2d2fda93a03a91ac6cdff319fd23ef51848d51 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Fri, 13 Dec 2024 05:49:02 +0800 Subject: [PATCH 36/46] gh-127845: Minor improvements to iOS test runner script (#127846) Uses symlinks to install iOS framework into testbed clone, adds a verbose mode to the iOS runner to hide most Xcode output, adds another mechanism to disable terminal colors, and ensures that stdout is flushed after every write. --- Makefile.pre.in | 2 +- iOS/testbed/__main__.py | 66 ++++++++++++++----- iOS/testbed/iOSTestbedTests/iOSTestbedTests.m | 5 +- 3 files changed, 53 insertions(+), 20 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index 7b66802147dc3a..3e880f7800fccf 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2169,7 +2169,7 @@ testios: $(PYTHON_FOR_BUILD) $(srcdir)/iOS/testbed clone --framework $(PYTHONFRAMEWORKPREFIX) "$(XCFOLDER)" # Run the testbed project - $(PYTHON_FOR_BUILD) "$(XCFOLDER)" run -- test -uall --single-process --rerun -W + $(PYTHON_FOR_BUILD) "$(XCFOLDER)" run --verbose -- test -uall --single-process --rerun -W # Like test, but using --slow-ci which enables all test resources and use # longer timeout. Run an optional pybuildbot.identify script to include diff --git a/iOS/testbed/__main__.py b/iOS/testbed/__main__.py index 22570ee0f3ed04..068272835a5b95 100644 --- a/iOS/testbed/__main__.py +++ b/iOS/testbed/__main__.py @@ -141,10 +141,12 @@ async def log_stream_task(initial_devices): else: suppress_dupes = False sys.stdout.write(line) + sys.stdout.flush() -async def xcode_test(location, simulator): +async def xcode_test(location, simulator, verbose): # Run the test suite on the named simulator + print("Starting xcodebuild...") args = [ "xcodebuild", "test", @@ -159,6 +161,9 @@ async def xcode_test(location, simulator): "-derivedDataPath", str(location / "DerivedData"), ] + if not verbose: + args += ["-quiet"] + async with async_process( *args, stdout=subprocess.PIPE, @@ -166,6 +171,7 @@ async def xcode_test(location, simulator): ) as process: while line := (await process.stdout.readline()).decode(*DECODE_ARGS): sys.stdout.write(line) + sys.stdout.flush() status = await asyncio.wait_for(process.wait(), timeout=1) exit(status) @@ -182,7 +188,9 @@ def clone_testbed( sys.exit(10) if framework is None: - if not (source / "Python.xcframework/ios-arm64_x86_64-simulator/bin").is_dir(): + if not ( + source / "Python.xcframework/ios-arm64_x86_64-simulator/bin" + ).is_dir(): print( f"The testbed being cloned ({source}) does not contain " f"a simulator framework. Re-run with --framework" @@ -202,33 +210,48 @@ def clone_testbed( ) sys.exit(13) - print("Cloning testbed project...") - shutil.copytree(source, target) + print("Cloning testbed project:") + print(f" Cloning {source}...", end="", flush=True) + shutil.copytree(source, target, symlinks=True) + print(" done") if framework is not None: if framework.suffix == ".xcframework": - print("Installing XCFramework...") - xc_framework_path = target / "Python.xcframework" - shutil.rmtree(xc_framework_path) - shutil.copytree(framework, xc_framework_path) + print(" Installing XCFramework...", end="", flush=True) + xc_framework_path = (target / "Python.xcframework").resolve() + if xc_framework_path.is_dir(): + shutil.rmtree(xc_framework_path) + else: + xc_framework_path.unlink() + xc_framework_path.symlink_to( + framework.relative_to(xc_framework_path.parent, walk_up=True) + ) + print(" done") else: - print("Installing simulator Framework...") + print(" Installing simulator framework...", end="", flush=True) sim_framework_path = ( target / "Python.xcframework" / "ios-arm64_x86_64-simulator" + ).resolve() + if sim_framework_path.is_dir(): + shutil.rmtree(sim_framework_path) + else: + sim_framework_path.unlink() + sim_framework_path.symlink_to( + framework.relative_to(sim_framework_path.parent, walk_up=True) ) - shutil.rmtree(sim_framework_path) - shutil.copytree(framework, sim_framework_path) + print(" done") else: - print("Using pre-existing iOS framework.") + print(" Using pre-existing iOS framework.") for app_src in apps: - print(f"Installing app {app_src.name!r}...") + print(f" Installing app {app_src.name!r}...", end="", flush=True) app_target = target / f"iOSTestbed/app/{app_src.name}" if app_target.is_dir(): shutil.rmtree(app_target) shutil.copytree(app_src, app_target) + print(" done") - print(f"Testbed project created in {target}") + print(f"Successfully cloned testbed: {target.resolve()}") def update_plist(testbed_path, args): @@ -243,10 +266,11 @@ def update_plist(testbed_path, args): plistlib.dump(info, f) -async def run_testbed(simulator: str, args: list[str]): +async def run_testbed(simulator: str, args: list[str], verbose: bool=False): location = Path(__file__).parent - print("Updating plist...") + print("Updating plist...", end="", flush=True) update_plist(location, args) + print(" done.") # Get the list of devices that are booted at the start of the test run. # The simulator started by the test suite will be detected as the new @@ -256,7 +280,7 @@ async def run_testbed(simulator: str, args: list[str]): try: async with asyncio.TaskGroup() as tg: tg.create_task(log_stream_task(initial_devices)) - tg.create_task(xcode_test(location, simulator)) + tg.create_task(xcode_test(location, simulator=simulator, verbose=verbose)) except* MySystemExit as e: raise SystemExit(*e.exceptions[0].args) from None except* subprocess.CalledProcessError as e: @@ -315,6 +339,11 @@ def main(): default="iPhone SE (3rd Generation)", help="The name of the simulator to use (default: 'iPhone SE (3rd Generation)')", ) + run.add_argument( + "-v", "--verbose", + action="store_true", + help="Enable verbose output", + ) try: pos = sys.argv.index("--") @@ -330,7 +359,7 @@ def main(): clone_testbed( source=Path(__file__).parent, target=Path(context.location), - framework=Path(context.framework) if context.framework else None, + framework=Path(context.framework).resolve() if context.framework else None, apps=[Path(app) for app in context.apps], ) elif context.subcommand == "run": @@ -348,6 +377,7 @@ def main(): asyncio.run( run_testbed( simulator=context.simulator, + verbose=context.verbose, args=test_args, ) ) diff --git a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m index ac78456a61e65e..6db38253396c8d 100644 --- a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m +++ b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m @@ -24,8 +24,11 @@ - (void)testPython { NSString *resourcePath = [[NSBundle mainBundle] resourcePath]; - // Disable all color, as the Xcode log can't display color + // Set some other common environment indicators to disable color, as the + // Xcode log can't display color. Stdout will report that it is *not* a + // TTY. setenv("NO_COLOR", "1", true); + setenv("PY_COLORS", "0", true); // Arguments to pass into the test suite runner. // argv[0] must identify the process; any subsequent arg From 58942a07df8811afba9c58dc16c1aab244ccf27a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= Date: Fri, 13 Dec 2024 10:26:22 +0100 Subject: [PATCH 37/46] Document PyObject_SelfIter (#127861) --- Doc/c-api/object.rst | 6 ++++++ Doc/data/refcounts.dat | 3 +++ 2 files changed, 9 insertions(+) diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 1ae3c46bea46ea..f97ade01e67850 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -509,6 +509,12 @@ Object Protocol iterated. +.. c:function:: PyObject* PyObject_SelfIter(PyObject *obj) + + This is equivalent to the Python ``__iter__(self): return self`` method. + It is intended for :term:`iterator` types, to be used in the :c:member:`PyTypeObject.tp_iter` slot. + + .. c:function:: PyObject* PyObject_GetAIter(PyObject *o) This is the equivalent to the Python expression ``aiter(o)``. Takes an diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 3f49c88c3cc028..a043af48ba7a05 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1849,6 +1849,9 @@ PyObject_RichCompareBool:PyObject*:o1:0: PyObject_RichCompareBool:PyObject*:o2:0: PyObject_RichCompareBool:int:opid:: +PyObject_SelfIter:PyObject*::+1: +PyObject_SelfIter:PyObject*:obj:0: + PyObject_SetAttr:int::: PyObject_SetAttr:PyObject*:o:0: PyObject_SetAttr:PyObject*:attr_name:0: From 11ff3286b7e821bf439bc7caa0fa712e3bc3846a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20K=C3=A1lm=C3=A1n?= Date: Fri, 13 Dec 2024 10:27:02 +0100 Subject: [PATCH 38/46] link to the correct output method in documentation (#127857) --- Doc/library/http.cookies.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/http.cookies.rst b/Doc/library/http.cookies.rst index 4ce2e3c4f4cb42..ad37a0fca4742d 100644 --- a/Doc/library/http.cookies.rst +++ b/Doc/library/http.cookies.rst @@ -98,7 +98,7 @@ Cookie Objects .. method:: BaseCookie.output(attrs=None, header='Set-Cookie:', sep='\r\n') Return a string representation suitable to be sent as HTTP headers. *attrs* and - *header* are sent to each :class:`Morsel`'s :meth:`output` method. *sep* is used + *header* are sent to each :class:`Morsel`'s :meth:`~Morsel.output` method. *sep* is used to join the headers together, and is by default the combination ``'\r\n'`` (CRLF). From 9b4bbf4401291636e5db90511a0548fffb23a505 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 13 Dec 2024 09:54:59 +0000 Subject: [PATCH 39/46] GH-125174: Don't use `UINT32_MAX` in header file (GH-127863) --- Include/refcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/refcount.h b/Include/refcount.h index 6908c426141378..d98b2dfcf37202 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -377,7 +377,7 @@ static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) #if SIZEOF_VOID_P > 4 /* If an object has been freed, it will have a negative full refcnt * If it has not it been freed, will have a very large refcnt */ - if (op->ob_refcnt_full <= 0 || op->ob_refcnt > (UINT32_MAX - (1<<20))) { + if (op->ob_refcnt_full <= 0 || op->ob_refcnt > (((PY_UINT32_T)-1) - (1<<20))) { #else if (op->ob_refcnt <= 0) { #endif From 5fc6bb2754a25157575efc0b37da78c629fea46e Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 13 Dec 2024 11:06:26 +0100 Subject: [PATCH 40/46] gh-126868: Add freelist for compact int objects (GH-126865) --- Include/internal/pycore_freelist_state.h | 2 + Include/internal/pycore_long.h | 2 + ...-11-16-22-37-46.gh-issue-126868.yOoHSY.rst | 1 + Objects/longobject.c | 78 ++++++++++++++----- Objects/object.c | 1 + Python/bytecodes.c | 25 +++--- Python/executor_cases.c.h | 24 +++--- Python/generated_cases.c.h | 24 +++--- 8 files changed, 102 insertions(+), 55 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-16-22-37-46.gh-issue-126868.yOoHSY.rst diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 4e04cf431e0b31..a1a94c1f2dc880 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -14,6 +14,7 @@ extern "C" { # define Py_dicts_MAXFREELIST 80 # define Py_dictkeys_MAXFREELIST 80 # define Py_floats_MAXFREELIST 100 +# define Py_ints_MAXFREELIST 100 # define Py_slices_MAXFREELIST 1 # define Py_contexts_MAXFREELIST 255 # define Py_async_gens_MAXFREELIST 80 @@ -35,6 +36,7 @@ struct _Py_freelist { struct _Py_freelists { struct _Py_freelist floats; + struct _Py_freelist ints; struct _Py_freelist tuples[PyTuple_MAXSAVESIZE]; struct _Py_freelist lists; struct _Py_freelist dicts; diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 196b4152280a35..8bead00e70640c 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -55,6 +55,8 @@ extern void _PyLong_FiniTypes(PyInterpreterState *interp); /* other API */ +PyAPI_FUNC(void) _PyLong_ExactDealloc(PyObject *self); + #define _PyLong_SMALL_INTS _Py_SINGLETON(small_ints) // _PyLong_GetZero() and _PyLong_GetOne() must always be available diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-16-22-37-46.gh-issue-126868.yOoHSY.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-16-22-37-46.gh-issue-126868.yOoHSY.rst new file mode 100644 index 00000000000000..fd1570908c1fd6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-16-22-37-46.gh-issue-126868.yOoHSY.rst @@ -0,0 +1 @@ +Increase performance of :class:`int` by adding a freelist for compact ints. diff --git a/Objects/longobject.c b/Objects/longobject.c index 4aa35685b509f2..96d59f542a7c3c 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6,6 +6,7 @@ #include "pycore_bitutils.h" // _Py_popcount32() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_call.h" // _PyObject_MakeTpCall +#include "pycore_freelist.h" // _Py_FREELIST_FREE, _Py_FREELIST_POP #include "pycore_long.h" // _Py_SmallInts #include "pycore_object.h" // _PyObject_Init() #include "pycore_runtime.h" // _PY_NSMALLPOSINTS @@ -42,7 +43,7 @@ static inline void _Py_DECREF_INT(PyLongObject *op) { assert(PyLong_CheckExact(op)); - _Py_DECREF_SPECIALIZED((PyObject *)op, (destructor)PyObject_Free); + _Py_DECREF_SPECIALIZED((PyObject *)op, _PyLong_ExactDealloc); } static inline int @@ -220,15 +221,18 @@ _PyLong_FromMedium(sdigit x) { assert(!IS_SMALL_INT(x)); assert(is_medium_int(x)); - /* We could use a freelist here */ - PyLongObject *v = PyObject_Malloc(sizeof(PyLongObject)); + + PyLongObject *v = (PyLongObject *)_Py_FREELIST_POP(PyLongObject, ints); if (v == NULL) { - PyErr_NoMemory(); - return NULL; + v = PyObject_Malloc(sizeof(PyLongObject)); + if (v == NULL) { + PyErr_NoMemory(); + return NULL; + } + _PyObject_Init((PyObject*)v, &PyLong_Type); } digit abs_x = x < 0 ? -x : x; _PyLong_SetSignAndDigitCount(v, x<0?-1:1, 1); - _PyObject_Init((PyObject*)v, &PyLong_Type); v->long_value.ob_digit[0] = abs_x; return (PyObject*)v; } @@ -3611,24 +3615,60 @@ long_richcompare(PyObject *self, PyObject *other, int op) Py_RETURN_RICHCOMPARE(result, 0, op); } +static inline int +compact_int_is_small(PyObject *self) +{ + PyLongObject *pylong = (PyLongObject *)self; + assert(_PyLong_IsCompact(pylong)); + stwodigits ival = medium_value(pylong); + if (IS_SMALL_INT(ival)) { + PyLongObject *small_pylong = (PyLongObject *)get_small_int((sdigit)ival); + if (pylong == small_pylong) { + return 1; + } + } + return 0; +} + +void +_PyLong_ExactDealloc(PyObject *self) +{ + assert(PyLong_CheckExact(self)); + if (_PyLong_IsCompact((PyLongObject *)self)) { + #ifndef Py_GIL_DISABLED + if (compact_int_is_small(self)) { + // See PEP 683, section Accidental De-Immortalizing for details + _Py_SetImmortal(self); + return; + } + #endif + _Py_FREELIST_FREE(ints, self, PyObject_Free); + return; + } + PyObject_Free(self); +} + static void long_dealloc(PyObject *self) { - /* This should never get called, but we also don't want to SEGV if - * we accidentally decref small Ints out of existence. Instead, - * since small Ints are immortal, re-set the reference count. - */ - PyLongObject *pylong = (PyLongObject*)self; - if (pylong && _PyLong_IsCompact(pylong)) { - stwodigits ival = medium_value(pylong); - if (IS_SMALL_INT(ival)) { - PyLongObject *small_pylong = (PyLongObject *)get_small_int((sdigit)ival); - if (pylong == small_pylong) { - _Py_SetImmortal(self); - return; - } + assert(self); + if (_PyLong_IsCompact((PyLongObject *)self)) { + if (compact_int_is_small(self)) { + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref small Ints out of existence. Instead, + * since small Ints are immortal, re-set the reference count. + * + * See PEP 683, section Accidental De-Immortalizing for details + */ + _Py_SetImmortal(self); + return; + } + if (PyLong_CheckExact(self)) { + _Py_FREELIST_FREE(ints, self, PyObject_Free); + return; } } + Py_TYPE(self)->tp_free(self); } diff --git a/Objects/object.c b/Objects/object.c index c64675b5e1d6c2..d584414c559b9d 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -936,6 +936,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); + clear_freelist(&freelists->ints, is_finalization, free_object); } /* diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d0e4c2bc45489b..f0eb5405faeff5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -26,6 +26,7 @@ #include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_range.h" // _PyRangeIterObject +#include "pycore_long.h" // _PyLong_ExactDealloc() #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_sliceobject.h" // _PyBuildSlice_ConsumeRefs #include "pycore_tuple.h" // _PyTuple_ITEMS() @@ -514,8 +515,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -527,8 +528,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -540,8 +541,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -801,7 +802,7 @@ dummy_func( assert(res_o != NULL); Py_INCREF(res_o); #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -821,7 +822,7 @@ dummy_func( DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -842,7 +843,7 @@ dummy_func( PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -959,7 +960,7 @@ dummy_func( assert(old_value != NULL); UNLOCK_OBJECT(list); // unlock before decrefs! Py_DECREF(old_value); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); DEAD(sub_st); PyStackRef_CLOSE(list_st); } @@ -2476,9 +2477,9 @@ dummy_func( Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); DEAD(left); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); DEAD(right); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 18f19773d25c90..19ba67a8af6769 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -626,8 +626,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -646,8 +646,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -666,8 +666,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1000,7 +1000,7 @@ assert(res_o != NULL); Py_INCREF(res_o); #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1042,7 +1042,7 @@ } STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1081,7 +1081,7 @@ PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1264,7 +1264,7 @@ assert(old_value != NULL); UNLOCK_OBJECT(list); // unlock before decrefs! Py_DECREF(old_value); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(list_st); stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); @@ -3075,8 +3075,8 @@ Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. stack_pointer[-2] = res; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fc0f55555f5c36..51227c9868b8cc 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -118,8 +118,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -285,8 +285,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -356,8 +356,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -590,7 +590,7 @@ assert(res_o != NULL); Py_INCREF(res_o); #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -622,7 +622,7 @@ DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c, BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -654,7 +654,7 @@ PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -3333,8 +3333,8 @@ Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); - PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. } @@ -7721,7 +7721,7 @@ assert(old_value != NULL); UNLOCK_OBJECT(list); // unlock before decrefs! Py_DECREF(old_value); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc); PyStackRef_CLOSE(list_st); stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); From e62e1ca4553dbcf9d7f89be24bebcbd9213f9ae5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 13 Dec 2024 11:00:00 +0000 Subject: [PATCH 41/46] GH-126833: Dumps graphviz representation of executor graph. (GH-126880) --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_optimizer.h | 5 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 + Python/ceval.c | 1 + Python/clinic/sysmodule.c.h | 58 +++++++- Python/optimizer.c | 136 +++++++++++++++++- Python/sysmodule.c | 25 ++++ 9 files changed, 230 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index c12e242d560bde..90214a314031d1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1129,6 +1129,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(origin)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(out_fd)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(outgoing)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(outpath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index dfd9f2b799ec8e..97a75d0c46c867 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -618,6 +618,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(origin) STRUCT_FOR_ID(out_fd) STRUCT_FOR_ID(outgoing) + STRUCT_FOR_ID(outpath) STRUCT_FOR_ID(overlapped) STRUCT_FOR_ID(owner) STRUCT_FOR_ID(pages) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 6d70b42f708854..bc7cfcde613d65 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -60,6 +60,9 @@ typedef struct { }; uint64_t operand0; // A cache entry uint64_t operand1; +#ifdef Py_STATS + uint64_t execution_count; +#endif } _PyUOpInstruction; typedef struct { @@ -285,6 +288,8 @@ static inline int is_terminator(const _PyUOpInstruction *uop) ); } +PyAPI_FUNC(int) _PyDumpExecutors(FILE *out); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index b631382cae058a..4f928cc050bf8e 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1127,6 +1127,7 @@ extern "C" { INIT_ID(origin), \ INIT_ID(out_fd), \ INIT_ID(outgoing), \ + INIT_ID(outpath), \ INIT_ID(overlapped), \ INIT_ID(owner), \ INIT_ID(pages), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 24cec3a4fded7a..5b78d038fc1192 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2268,6 +2268,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(outpath); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(overlapped); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Python/ceval.c b/Python/ceval.c index 5eda033eced628..fd891d7839151e 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1095,6 +1095,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int UOP_PAIR_INC(uopcode, lastuop); #ifdef Py_STATS trace_uop_execution_counter++; + ((_PyUOpInstruction *)next_uop)[-1].execution_count++; #endif switch (uopcode) { diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 86c42ceffc5e31..cfcbd55388efa0 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1481,6 +1481,62 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored)) return sys_is_stack_trampoline_active_impl(module); } +PyDoc_STRVAR(sys__dump_tracelets__doc__, +"_dump_tracelets($module, /, outpath)\n" +"--\n" +"\n" +"Dump the graph of tracelets in graphviz format"); + +#define SYS__DUMP_TRACELETS_METHODDEF \ + {"_dump_tracelets", _PyCFunction_CAST(sys__dump_tracelets), METH_FASTCALL|METH_KEYWORDS, sys__dump_tracelets__doc__}, + +static PyObject * +sys__dump_tracelets_impl(PyObject *module, PyObject *outpath); + +static PyObject * +sys__dump_tracelets(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(outpath), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"outpath", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_dump_tracelets", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *outpath; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + outpath = args[0]; + return_value = sys__dump_tracelets_impl(module, outpath); + +exit: + return return_value; +} + PyDoc_STRVAR(sys__getframemodulename__doc__, "_getframemodulename($module, /, depth=0)\n" "--\n" @@ -1668,4 +1724,4 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=6d4f6cd20419b675 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=568b0a0069dc43e8 input=a9049054013a1b77]*/ diff --git a/Python/optimizer.c b/Python/optimizer.c index 6a232218981dcd..6a4d20fad76c15 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1,6 +1,7 @@ +#include "Python.h" + #ifdef _Py_TIER2 -#include "Python.h" #include "opcode.h" #include "pycore_interp.h" #include "pycore_backoff.h" @@ -474,6 +475,9 @@ add_to_trace( trace[trace_length].target = target; trace[trace_length].oparg = oparg; trace[trace_length].operand0 = operand; +#ifdef Py_STATS + trace[trace_length].execution_count = 0; +#endif return trace_length + 1; } @@ -983,6 +987,9 @@ static void make_exit(_PyUOpInstruction *inst, int opcode, int target) inst->operand0 = 0; inst->format = UOP_FORMAT_TARGET; inst->target = target; +#ifdef Py_STATS + inst->execution_count = 0; +#endif } /* Convert implicit exits, errors and deopts @@ -1709,4 +1716,131 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp) _Py_Executors_InvalidateAll(interp, 0); } +static void +write_str(PyObject *str, FILE *out) +{ + // Encode the Unicode object to the specified encoding + PyObject *encoded_obj = PyUnicode_AsEncodedString(str, "utf8", "strict"); + if (encoded_obj == NULL) { + PyErr_Clear(); + return; + } + const char *encoded_str = PyBytes_AsString(encoded_obj); + Py_ssize_t encoded_size = PyBytes_Size(encoded_obj); + fwrite(encoded_str, 1, encoded_size, out); + Py_DECREF(encoded_obj); +} + +static int +find_line_number(PyCodeObject *code, _PyExecutorObject *executor) +{ + int code_len = (int)Py_SIZE(code); + for (int i = 0; i < code_len; i++) { + _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; + int opcode = instr->op.code; + if (opcode == ENTER_EXECUTOR) { + _PyExecutorObject *exec = code->co_executors->executors[instr->op.arg]; + if (exec == executor) { + return PyCode_Addr2Line(code, i*2); + } + } + i += _PyOpcode_Caches[_Py_GetBaseCodeUnit(code, i).op.code]; + } + return -1; +} + +/* Writes the node and outgoing edges for a single tracelet in graphviz format. + * Each tracelet is presented as a table of the uops it contains. + * If Py_STATS is enabled, execution counts are included. + * + * https://graphviz.readthedocs.io/en/stable/manual.html + * https://graphviz.org/gallery/ + */ +static void +executor_to_gv(_PyExecutorObject *executor, FILE *out) +{ + PyCodeObject *code = executor->vm_data.code; + fprintf(out, "executor_%p [\n", executor); + fprintf(out, " shape = none\n"); + + /* Write the HTML table for the uops */ + fprintf(out, " label = <\n"); + fprintf(out, " \n"); + if (code == NULL) { + fprintf(out, " \n"); + } + else { + fprintf(out, " \n", line); + } + for (uint32_t i = 0; i < executor->code_size; i++) { + /* Write row for uop. + * The `port` is a marker so that outgoing edges can + * be placed correctly. If a row is marked `port=17`, + * then the outgoing edge is `{EXEC_NAME}:17 -> {TARGET}` + * https://graphviz.readthedocs.io/en/stable/manual.html#node-ports-compass + */ + _PyUOpInstruction const *inst = &executor->trace[i]; + const char *opname = _PyOpcode_uop_name[inst->opcode]; +#ifdef Py_STATS + fprintf(out, " \n", i, opname, inst->execution_count); +#else + fprintf(out, " \n", i, opname); +#endif + if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) { + break; + } + } + fprintf(out, "
Executor
No code object
"); + write_str(code->co_qualname, out); + int line = find_line_number(code, executor); + fprintf(out, ": %d
%s -- %" PRIu64 "
%s
>\n"); + fprintf(out, "]\n\n"); + + /* Write all the outgoing edges */ + for (uint32_t i = 0; i < executor->code_size; i++) { + _PyUOpInstruction const *inst = &executor->trace[i]; + uint16_t flags = _PyUop_Flags[inst->opcode]; + _PyExitData *exit = NULL; + if (inst->opcode == _EXIT_TRACE) { + exit = (_PyExitData *)inst->operand0; + } + else if (flags & HAS_EXIT_FLAG) { + assert(inst->format == UOP_FORMAT_JUMP); + _PyUOpInstruction const *exit_inst = &executor->trace[inst->jump_target]; + assert(exit_inst->opcode == _EXIT_TRACE); + exit = (_PyExitData *)exit_inst->operand0; + } + if (exit != NULL && exit->executor != NULL) { + fprintf(out, "executor_%p:i%d -> executor_%p:start\n", executor, i, exit->executor); + } + if (inst->opcode == _EXIT_TRACE || inst->opcode == _JUMP_TO_TOP) { + break; + } + } +} + +/* Write the graph of all the live tracelets in graphviz format. */ +int +_PyDumpExecutors(FILE *out) +{ + fprintf(out, "digraph ideal {\n\n"); + fprintf(out, " rankdir = \"LR\"\n\n"); + PyInterpreterState *interp = PyInterpreterState_Get(); + for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { + executor_to_gv(exec, out); + exec = exec->vm_data.links.next; + } + fprintf(out, "}\n\n"); + return 0; +} + +#else + +int +_PyDumpExecutors(FILE *out) +{ + PyErr_SetString(PyExc_NotImplementedError, "No JIT available"); + return -1; +} + #endif /* _Py_TIER2 */ diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 6df297f364c5d3..d6719f9bb0af91 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2344,6 +2344,30 @@ sys_is_stack_trampoline_active_impl(PyObject *module) Py_RETURN_FALSE; } +/*[clinic input] +sys._dump_tracelets + + outpath: object + +Dump the graph of tracelets in graphviz format +[clinic start generated code]*/ + +static PyObject * +sys__dump_tracelets_impl(PyObject *module, PyObject *outpath) +/*[clinic end generated code: output=a7fe265e2bc3b674 input=5bff6880cd28ffd1]*/ +{ + FILE *out = _Py_fopen_obj(outpath, "wb"); + if (out == NULL) { + return NULL; + } + int err = _PyDumpExecutors(out); + fclose(out); + if (err) { + return NULL; + } + Py_RETURN_NONE; +} + /*[clinic input] sys._getframemodulename @@ -2603,6 +2627,7 @@ static PyMethodDef sys_methods[] = { #endif SYS__GET_CPU_COUNT_CONFIG_METHODDEF SYS__IS_GIL_ENABLED_METHODDEF + SYS__DUMP_TRACELETS_METHODDEF {NULL, NULL} // sentinel }; From 6ff38fc4e2af8e795dc791be6ea596d2146d4119 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Dec 2024 13:53:47 +0100 Subject: [PATCH 42/46] gh-127870: Detect recursive calls in ctypes _as_parameter_ handling (#127872) --- Lib/test/test_ctypes/test_as_parameter.py | 12 ++++++++-- ...-12-12-16-59-42.gh-issue-127870._NFG-3.rst | 2 ++ Modules/_ctypes/_ctypes.c | 22 ++++++++++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-12-16-59-42.gh-issue-127870._NFG-3.rst diff --git a/Lib/test/test_ctypes/test_as_parameter.py b/Lib/test/test_ctypes/test_as_parameter.py index cc62b1a22a3b06..c5e1840b0eb7af 100644 --- a/Lib/test/test_ctypes/test_as_parameter.py +++ b/Lib/test/test_ctypes/test_as_parameter.py @@ -198,8 +198,16 @@ class A: a = A() a._as_parameter_ = a - with self.assertRaises(RecursionError): - c_int.from_param(a) + for c_type in ( + ctypes.c_wchar_p, + ctypes.c_char_p, + ctypes.c_void_p, + ctypes.c_int, # PyCSimpleType + POINT, # CDataType + ): + with self.subTest(c_type=c_type): + with self.assertRaises(RecursionError): + c_type.from_param(a) class AsParamWrapper: diff --git a/Misc/NEWS.d/next/Library/2024-12-12-16-59-42.gh-issue-127870._NFG-3.rst b/Misc/NEWS.d/next/Library/2024-12-12-16-59-42.gh-issue-127870._NFG-3.rst new file mode 100644 index 00000000000000..99b2df00032082 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-12-16-59-42.gh-issue-127870._NFG-3.rst @@ -0,0 +1,2 @@ +Detect recursive calls in ctypes ``_as_parameter_`` handling. +Patch by Victor Stinner. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 34529bce496d88..bb4699884057ba 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -1052,8 +1052,13 @@ CDataType_from_param_impl(PyObject *type, PyTypeObject *cls, PyObject *value) return NULL; } if (as_parameter) { + if (_Py_EnterRecursiveCall(" while processing _as_parameter_")) { + Py_DECREF(as_parameter); + return NULL; + } value = CDataType_from_param_impl(type, cls, as_parameter); Py_DECREF(as_parameter); + _Py_LeaveRecursiveCall(); return value; } PyErr_Format(PyExc_TypeError, @@ -1843,8 +1848,13 @@ c_wchar_p_from_param_impl(PyObject *type, PyTypeObject *cls, PyObject *value) return NULL; } if (as_parameter) { + if (_Py_EnterRecursiveCall(" while processing _as_parameter_")) { + Py_DECREF(as_parameter); + return NULL; + } value = c_wchar_p_from_param_impl(type, cls, as_parameter); Py_DECREF(as_parameter); + _Py_LeaveRecursiveCall(); return value; } PyErr_Format(PyExc_TypeError, @@ -1927,8 +1937,13 @@ c_char_p_from_param_impl(PyObject *type, PyTypeObject *cls, PyObject *value) return NULL; } if (as_parameter) { + if (_Py_EnterRecursiveCall(" while processing _as_parameter_")) { + Py_DECREF(as_parameter); + return NULL; + } value = c_char_p_from_param_impl(type, cls, as_parameter); Py_DECREF(as_parameter); + _Py_LeaveRecursiveCall(); return value; } PyErr_Format(PyExc_TypeError, @@ -2079,8 +2094,13 @@ c_void_p_from_param_impl(PyObject *type, PyTypeObject *cls, PyObject *value) return NULL; } if (as_parameter) { + if (_Py_EnterRecursiveCall(" while processing _as_parameter_")) { + Py_DECREF(as_parameter); + return NULL; + } value = c_void_p_from_param_impl(type, cls, as_parameter); Py_DECREF(as_parameter); + _Py_LeaveRecursiveCall(); return value; } PyErr_Format(PyExc_TypeError, @@ -2447,9 +2467,9 @@ PyCSimpleType_from_param_impl(PyObject *type, PyTypeObject *cls, return NULL; } value = PyCSimpleType_from_param_impl(type, cls, as_parameter); - _Py_LeaveRecursiveCall(); Py_DECREF(as_parameter); Py_XDECREF(exc); + _Py_LeaveRecursiveCall(); return value; } if (exc) { From d05a4e6a0d366b854a3103cae0c941811fd48c4c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Dec 2024 14:23:20 +0100 Subject: [PATCH 43/46] gh-127906: Test the limited C API in test_cppext (#127916) --- Lib/test/test_cppext/__init__.py | 13 ++++++++++--- Lib/test/test_cppext/extension.cpp | 9 +++++++++ Lib/test/test_cppext/setup.py | 6 ++++++ .../2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst | 1 + 4 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst diff --git a/Lib/test/test_cppext/__init__.py b/Lib/test/test_cppext/__init__.py index efd79448c66104..d5195227308fec 100644 --- a/Lib/test/test_cppext/__init__.py +++ b/Lib/test/test_cppext/__init__.py @@ -41,12 +41,17 @@ def test_build_cpp11(self): def test_build_cpp14(self): self.check_build('_testcpp14ext', std='c++14') - def check_build(self, extension_name, std=None): + @support.requires_gil_enabled('incompatible with Free Threading') + def test_build_limited(self): + self.check_build('_testcppext_limited', limited=True) + + def check_build(self, extension_name, std=None, limited=False): venv_dir = 'env' with support.setup_venv_with_pip_setuptools_wheel(venv_dir) as python_exe: - self._check_build(extension_name, python_exe, std=std) + self._check_build(extension_name, python_exe, + std=std, limited=limited) - def _check_build(self, extension_name, python_exe, std): + def _check_build(self, extension_name, python_exe, std, limited): pkg_dir = 'pkg' os.mkdir(pkg_dir) shutil.copy(SETUP, os.path.join(pkg_dir, os.path.basename(SETUP))) @@ -56,6 +61,8 @@ def run_cmd(operation, cmd): env = os.environ.copy() if std: env['CPYTHON_TEST_CPP_STD'] = std + if limited: + env['CPYTHON_TEST_LIMITED'] = '1' env['CPYTHON_TEST_EXT_NAME'] = extension_name if support.verbose: print('Run:', ' '.join(map(shlex.quote, cmd))) diff --git a/Lib/test/test_cppext/extension.cpp b/Lib/test/test_cppext/extension.cpp index ab485b629b7788..500d5918145c00 100644 --- a/Lib/test/test_cppext/extension.cpp +++ b/Lib/test/test_cppext/extension.cpp @@ -62,6 +62,7 @@ test_api_casts(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) Py_ssize_t refcnt = Py_REFCNT(obj); assert(refcnt >= 1); +#ifndef Py_LIMITED_API // gh-92138: For backward compatibility, functions of Python C API accepts // "const PyObject*". Check that using it does not emit C++ compiler // warnings. @@ -74,6 +75,7 @@ test_api_casts(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) assert(PyTuple_GET_SIZE(const_obj) == 2); PyObject *one = PyTuple_GET_ITEM(const_obj, 0); assert(PyLong_AsLong(one) == 1); +#endif // gh-92898: StrongRef doesn't inherit from PyObject but has an operator to // cast to PyObject*. @@ -106,6 +108,12 @@ test_unicode(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) } assert(PyUnicode_Check(str)); + + assert(PyUnicode_GetLength(str) == 3); + assert(PyUnicode_ReadChar(str, 0) == 'a'); + assert(PyUnicode_ReadChar(str, 1) == 'b'); + +#ifndef Py_LIMITED_API assert(PyUnicode_GET_LENGTH(str) == 3); // gh-92800: test PyUnicode_READ() @@ -121,6 +129,7 @@ test_unicode(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) assert(PyUnicode_READ(ukind, const_data, 2) == 'c'); assert(PyUnicode_READ_CHAR(str, 1) == 'b'); +#endif Py_DECREF(str); Py_RETURN_NONE; diff --git a/Lib/test/test_cppext/setup.py b/Lib/test/test_cppext/setup.py index d97b238b8d1477..019ff18446a2eb 100644 --- a/Lib/test/test_cppext/setup.py +++ b/Lib/test/test_cppext/setup.py @@ -33,6 +33,7 @@ def main(): cppflags = list(CPPFLAGS) std = os.environ.get("CPYTHON_TEST_CPP_STD", "") module_name = os.environ["CPYTHON_TEST_EXT_NAME"] + limited = bool(os.environ.get("CPYTHON_TEST_LIMITED", "")) cppflags = list(CPPFLAGS) cppflags.append(f'-DMODULE_NAME={module_name}') @@ -59,6 +60,11 @@ def main(): # CC env var overrides sysconfig CC variable in setuptools os.environ['CC'] = cmd + # Define Py_LIMITED_API macro + if limited: + version = sys.hexversion + cppflags.append(f'-DPy_LIMITED_API={version:#x}') + # On Windows, add PCbuild\amd64\ to include and library directories include_dirs = [] library_dirs = [] diff --git a/Misc/NEWS.d/next/Tests/2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst b/Misc/NEWS.d/next/Tests/2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst new file mode 100644 index 00000000000000..6f577e741dff7f --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-12-13-13-41-34.gh-issue-127906.NuRHlB.rst @@ -0,0 +1 @@ +Test the limited C API in test_cppext. Patch by Victor Stinner. From 6446408d426814bf2bc9d3911a91741f04d4bc4e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 Dec 2024 14:24:48 +0100 Subject: [PATCH 44/46] gh-102471, PEP 757: Add PyLong import and export API (#121339) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Sergey B Kirpichev Co-authored-by: Steve Dower Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/c-api/long.rst | 174 ++++++++++++++++++ Doc/data/refcounts.dat | 7 + Doc/whatsnew/3.14.rst | 11 ++ Include/cpython/longintrepr.h | 38 ++++ Lib/test/test_capi/test_long.py | 91 +++++++++ ...-07-03-17-26-53.gh-issue-102471.XpmKYk.rst | 10 + Modules/_testcapi/long.c | 124 +++++++++++++ Objects/longobject.c | 120 ++++++++++++ Tools/c-analyzer/cpython/ignored.tsv | 1 + 9 files changed, 576 insertions(+) create mode 100644 Misc/NEWS.d/next/C_API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index cb12d43d92026f..f48cd07a979f56 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -653,3 +653,177 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.12 + +Export API +^^^^^^^^^^ + +.. versionadded:: next + +.. c:struct:: PyLongLayout + + Layout of an array of "digits" ("limbs" in the GMP terminology), used to + represent absolute value for arbitrary precision integers. + + Use :c:func:`PyLong_GetNativeLayout` to get the native layout of Python + :class:`int` objects, used internally for integers with "big enough" + absolute value. + + See also :data:`sys.int_info` which exposes similar information in Python. + + .. c:member:: uint8_t bits_per_digit + + Bits per digit. For example, a 15 bit digit means that bits 0-14 contain + meaningful information. + + .. c:member:: uint8_t digit_size + + Digit size in bytes. For example, a 15 bit digit will require at least 2 + bytes. + + .. c:member:: int8_t digits_order + + Digits order: + + - ``1`` for most significant digit first + - ``-1`` for least significant digit first + + .. c:member:: int8_t digit_endianness + + Digit endianness: + + - ``1`` for most significant byte first (big endian) + - ``-1`` for least significant byte first (little endian) + + +.. c:function:: const PyLongLayout* PyLong_GetNativeLayout(void) + + Get the native layout of Python :class:`int` objects. + + See the :c:struct:`PyLongLayout` structure. + + The function must not be called before Python initialization nor after + Python finalization. The returned layout is valid until Python is + finalized. The layout is the same for all Python sub-interpreters + in a process, and so it can be cached. + + +.. c:struct:: PyLongExport + + Export of a Python :class:`int` object. + + There are two cases: + + * If :c:member:`digits` is ``NULL``, only use the :c:member:`value` member. + * If :c:member:`digits` is not ``NULL``, use :c:member:`negative`, + :c:member:`ndigits` and :c:member:`digits` members. + + .. c:member:: int64_t value + + The native integer value of the exported :class:`int` object. + Only valid if :c:member:`digits` is ``NULL``. + + .. c:member:: uint8_t negative + + ``1`` if the number is negative, ``0`` otherwise. + Only valid if :c:member:`digits` is not ``NULL``. + + .. c:member:: Py_ssize_t ndigits + + Number of digits in :c:member:`digits` array. + Only valid if :c:member:`digits` is not ``NULL``. + + .. c:member:: const void *digits + + Read-only array of unsigned digits. Can be ``NULL``. + + +.. c:function:: int PyLong_Export(PyObject *obj, PyLongExport *export_long) + + Export a Python :class:`int` object. + + *export_long* must point to a :c:struct:`PyLongExport` structure allocated + by the caller. It must not be ``NULL``. + + On success, fill in *\*export_long* and return ``0``. + On error, set an exception and return ``-1``. + + :c:func:`PyLong_FreeExport` must be called when the export is no longer + needed. + + .. impl-detail:: + This function always succeeds if *obj* is a Python :class:`int` object + or a subclass. + + +.. c:function:: void PyLong_FreeExport(PyLongExport *export_long) + + Release the export *export_long* created by :c:func:`PyLong_Export`. + + .. impl-detail:: + Calling :c:func:`PyLong_FreeExport` is optional if *export_long->digits* + is ``NULL``. + + +PyLongWriter API +^^^^^^^^^^^^^^^^ + +The :c:type:`PyLongWriter` API can be used to import an integer. + +.. versionadded:: next + +.. c:struct:: PyLongWriter + + A Python :class:`int` writer instance. + + The instance must be destroyed by :c:func:`PyLongWriter_Finish` or + :c:func:`PyLongWriter_Discard`. + + +.. c:function:: PyLongWriter* PyLongWriter_Create(int negative, Py_ssize_t ndigits, void **digits) + + Create a :c:type:`PyLongWriter`. + + On success, allocate *\*digits* and return a writer. + On error, set an exception and return ``NULL``. + + *negative* is ``1`` if the number is negative, or ``0`` otherwise. + + *ndigits* is the number of digits in the *digits* array. It must be + greater than 0. + + *digits* must not be NULL. + + After a successful call to this function, the caller should fill in the + array of digits *digits* and then call :c:func:`PyLongWriter_Finish` to get + a Python :class:`int`. + The layout of *digits* is described by :c:func:`PyLong_GetNativeLayout`. + + Digits must be in the range [``0``; ``(1 << bits_per_digit) - 1``] + (where the :c:struct:`~PyLongLayout.bits_per_digit` is the number of bits + per digit). + Any unused most significant digits must be set to ``0``. + + Alternately, call :c:func:`PyLongWriter_Discard` to destroy the writer + instance without creating an :class:`~int` object. + + +.. c:function:: PyObject* PyLongWriter_Finish(PyLongWriter *writer) + + Finish a :c:type:`PyLongWriter` created by :c:func:`PyLongWriter_Create`. + + On success, return a Python :class:`int` object. + On error, set an exception and return ``NULL``. + + The function takes care of normalizing the digits and converts the object + to a compact integer if needed. + + The writer instance and the *digits* array are invalid after the call. + + +.. c:function:: void PyLongWriter_Discard(PyLongWriter *writer) + + Discard a :c:type:`PyLongWriter` created by :c:func:`PyLongWriter_Create`. + + *writer* must not be ``NULL``. + + The writer instance and the *digits* array are invalid after the call. diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index a043af48ba7a05..e78754e24e23d8 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1299,6 +1299,13 @@ PyLong_GetSign:int::: PyLong_GetSign:PyObject*:v:0: PyLong_GetSign:int*:sign:: +PyLong_Export:int::: +PyLong_Export:PyObject*:obj:0: +PyLong_Export:PyLongExport*:export_long:: + +PyLongWriter_Finish:PyObject*::+1: +PyLongWriter_Finish:PyLongWriter*:writer:: + PyMapping_Check:int::: PyMapping_Check:PyObject*:o:0: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b71d31f9742fe0..5ce398ab93d6b4 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1018,6 +1018,17 @@ New features (Contributed by Victor Stinner in :gh:`107954`.) +* Add a new import and export API for Python :class:`int` objects (:pep:`757`): + + * :c:func:`PyLong_GetNativeLayout`; + * :c:func:`PyLong_Export`; + * :c:func:`PyLong_FreeExport`; + * :c:func:`PyLongWriter_Create`; + * :c:func:`PyLongWriter_Finish`; + * :c:func:`PyLongWriter_Discard`. + + (Contributed by Victor Stinner in :gh:`102471`.) + * Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot for easier superclass identification, which attempts to resolve the `type checking issue `__ mentioned in :pep:`630` diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h index c60ccc463653f9..357477b60d9a5a 100644 --- a/Include/cpython/longintrepr.h +++ b/Include/cpython/longintrepr.h @@ -139,6 +139,44 @@ _PyLong_CompactValue(const PyLongObject *op) #define PyUnstable_Long_CompactValue _PyLong_CompactValue +/* --- Import/Export API -------------------------------------------------- */ + +typedef struct PyLongLayout { + uint8_t bits_per_digit; + uint8_t digit_size; + int8_t digits_order; + int8_t digit_endianness; +} PyLongLayout; + +PyAPI_FUNC(const PyLongLayout*) PyLong_GetNativeLayout(void); + +typedef struct PyLongExport { + int64_t value; + uint8_t negative; + Py_ssize_t ndigits; + const void *digits; + // Member used internally, must not be used for other purpose. + Py_uintptr_t _reserved; +} PyLongExport; + +PyAPI_FUNC(int) PyLong_Export( + PyObject *obj, + PyLongExport *export_long); +PyAPI_FUNC(void) PyLong_FreeExport( + PyLongExport *export_long); + + +/* --- PyLongWriter API --------------------------------------------------- */ + +typedef struct PyLongWriter PyLongWriter; + +PyAPI_FUNC(PyLongWriter*) PyLongWriter_Create( + int negative, + Py_ssize_t ndigits, + void **digits); +PyAPI_FUNC(PyObject*) PyLongWriter_Finish(PyLongWriter *writer); +PyAPI_FUNC(void) PyLongWriter_Discard(PyLongWriter *writer); + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index a77094588a0edf..d45ac75c822ea9 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -10,6 +10,7 @@ NULL = None + class IntSubclass(int): pass @@ -714,5 +715,95 @@ def test_long_asuint64(self): self.check_long_asint(as_uint64, 0, UINT64_MAX, negative_value_error=ValueError) + def test_long_layout(self): + # Test PyLong_GetNativeLayout() + int_info = sys.int_info + layout = _testcapi.get_pylong_layout() + expected = { + 'bits_per_digit': int_info.bits_per_digit, + 'digit_size': int_info.sizeof_digit, + 'digits_order': -1, + 'digit_endianness': -1 if sys.byteorder == 'little' else 1, + } + self.assertEqual(layout, expected) + + def test_long_export(self): + # Test PyLong_Export() + layout = _testcapi.get_pylong_layout() + base = 2 ** layout['bits_per_digit'] + + pylong_export = _testcapi.pylong_export + + # value fits into int64_t + self.assertEqual(pylong_export(0), 0) + self.assertEqual(pylong_export(123), 123) + self.assertEqual(pylong_export(-123), -123) + self.assertEqual(pylong_export(IntSubclass(123)), 123) + + # use an array, doesn't fit into int64_t + self.assertEqual(pylong_export(base**10 * 2 + 1), + (0, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])) + self.assertEqual(pylong_export(-(base**10 * 2 + 1)), + (1, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])) + self.assertEqual(pylong_export(IntSubclass(base**10 * 2 + 1)), + (0, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2])) + + self.assertRaises(TypeError, pylong_export, 1.0) + self.assertRaises(TypeError, pylong_export, 0+1j) + self.assertRaises(TypeError, pylong_export, "abc") + + def test_longwriter_create(self): + # Test PyLongWriter_Create() + layout = _testcapi.get_pylong_layout() + base = 2 ** layout['bits_per_digit'] + + pylongwriter_create = _testcapi.pylongwriter_create + self.assertRaises(ValueError, pylongwriter_create, 0, []) + self.assertRaises(ValueError, pylongwriter_create, -123, []) + self.assertEqual(pylongwriter_create(0, [0]), 0) + self.assertEqual(pylongwriter_create(0, [123]), 123) + self.assertEqual(pylongwriter_create(1, [123]), -123) + self.assertEqual(pylongwriter_create(1, [1, 2]), + -(base * 2 + 1)) + self.assertEqual(pylongwriter_create(0, [1, 2, 3]), + base**2 * 3 + base * 2 + 1) + max_digit = base - 1 + self.assertEqual(pylongwriter_create(0, [max_digit, max_digit, max_digit]), + base**2 * max_digit + base * max_digit + max_digit) + + # normalize + self.assertEqual(pylongwriter_create(0, [123, 0, 0]), 123) + + # test singletons + normalize + for num in (-2, 0, 1, 5, 42, 100): + self.assertIs(pylongwriter_create(bool(num < 0), [abs(num), 0]), + num) + + def to_digits(num): + digits = [] + while True: + num, digit = divmod(num, base) + digits.append(digit) + if not num: + break + return digits + + # round trip: Python int -> export -> Python int + pylong_export = _testcapi.pylong_export + numbers = [*range(0, 10), 12345, 0xdeadbeef, 2**100, 2**100-1] + numbers.extend(-num for num in list(numbers)) + for num in numbers: + with self.subTest(num=num): + data = pylong_export(num) + if isinstance(data, tuple): + negative, digits = data + else: + value = data + negative = int(value < 0) + digits = to_digits(abs(value)) + self.assertEqual(pylongwriter_create(negative, digits), num, + (negative, digits)) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C_API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst b/Misc/NEWS.d/next/C_API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst new file mode 100644 index 00000000000000..c18c159ac87d08 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-07-03-17-26-53.gh-issue-102471.XpmKYk.rst @@ -0,0 +1,10 @@ +Add a new import and export API for Python :class:`int` objects (:pep:`757`): + +* :c:func:`PyLong_GetNativeLayout`; +* :c:func:`PyLong_Export`; +* :c:func:`PyLong_FreeExport`; +* :c:func:`PyLongWriter_Create`; +* :c:func:`PyLongWriter_Finish`; +* :c:func:`PyLongWriter_Discard`. + +Patch by Victor Stinner. diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index ebea09080ef11c..42243023a45768 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -141,6 +141,127 @@ pylong_aspid(PyObject *module, PyObject *arg) } +static PyObject * +layout_to_dict(const PyLongLayout *layout) +{ + return Py_BuildValue("{sisisisi}", + "bits_per_digit", (int)layout->bits_per_digit, + "digit_size", (int)layout->digit_size, + "digits_order", (int)layout->digits_order, + "digit_endianness", (int)layout->digit_endianness); +} + + +static PyObject * +pylong_export(PyObject *module, PyObject *obj) +{ + PyLongExport export_long; + if (PyLong_Export(obj, &export_long) < 0) { + return NULL; + } + + if (export_long.digits == NULL) { + assert(export_long.negative == 0); + assert(export_long.ndigits == 0); + assert(export_long.digits == NULL); + PyObject *res = PyLong_FromInt64(export_long.value); + PyLong_FreeExport(&export_long); + return res; + } + + assert(PyLong_GetNativeLayout()->digit_size == sizeof(digit)); + const digit *export_long_digits = export_long.digits; + + PyObject *digits = PyList_New(0); + if (digits == NULL) { + goto error; + } + for (Py_ssize_t i = 0; i < export_long.ndigits; i++) { + PyObject *item = PyLong_FromUnsignedLong(export_long_digits[i]); + if (item == NULL) { + goto error; + } + + if (PyList_Append(digits, item) < 0) { + Py_DECREF(item); + goto error; + } + Py_DECREF(item); + } + + assert(export_long.value == 0); + PyObject *res = Py_BuildValue("(iN)", export_long.negative, digits); + + PyLong_FreeExport(&export_long); + assert(export_long._reserved == 0); + + return res; + +error: + Py_XDECREF(digits); + PyLong_FreeExport(&export_long); + return NULL; +} + + +static PyObject * +pylongwriter_create(PyObject *module, PyObject *args) +{ + int negative; + PyObject *list; + // TODO(vstinner): write test for negative ndigits and digits==NULL + if (!PyArg_ParseTuple(args, "iO!", &negative, &PyList_Type, &list)) { + return NULL; + } + Py_ssize_t ndigits = PyList_GET_SIZE(list); + + digit *digits = PyMem_Malloc((size_t)ndigits * sizeof(digit)); + if (digits == NULL) { + return PyErr_NoMemory(); + } + + for (Py_ssize_t i = 0; i < ndigits; i++) { + PyObject *item = PyList_GET_ITEM(list, i); + + long num = PyLong_AsLong(item); + if (num == -1 && PyErr_Occurred()) { + goto error; + } + + if (num < 0 || num >= PyLong_BASE) { + PyErr_SetString(PyExc_ValueError, "digit doesn't fit into digit"); + goto error; + } + digits[i] = (digit)num; + } + + void *writer_digits; + PyLongWriter *writer = PyLongWriter_Create(negative, ndigits, + &writer_digits); + if (writer == NULL) { + goto error; + } + assert(PyLong_GetNativeLayout()->digit_size == sizeof(digit)); + memcpy(writer_digits, digits, (size_t)ndigits * sizeof(digit)); + PyObject *res = PyLongWriter_Finish(writer); + PyMem_Free(digits); + + return res; + +error: + PyMem_Free(digits); + return NULL; +} + + +static PyObject * +get_pylong_layout(PyObject *module, PyObject *Py_UNUSED(args)) +{ + const PyLongLayout *layout = PyLong_GetNativeLayout(); + return layout_to_dict(layout); +} + + static PyMethodDef test_methods[] = { _TESTCAPI_CALL_LONG_COMPACT_API_METHODDEF {"pylong_fromunicodeobject", pylong_fromunicodeobject, METH_VARARGS}, @@ -148,6 +269,9 @@ static PyMethodDef test_methods[] = { {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, {"pylong_getsign", pylong_getsign, METH_O}, {"pylong_aspid", pylong_aspid, METH_O}, + {"pylong_export", pylong_export, METH_O}, + {"pylongwriter_create", pylongwriter_create, METH_VARARGS}, + {"get_pylong_layout", get_pylong_layout, METH_NOARGS}, {"pylong_ispositive", pylong_ispositive, METH_O}, {"pylong_isnegative", pylong_isnegative, METH_O}, {"pylong_iszero", pylong_iszero, METH_O}, diff --git a/Objects/longobject.c b/Objects/longobject.c index 96d59f542a7c3c..bd7ff68d0899c6 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6750,6 +6750,7 @@ PyUnstable_Long_CompactValue(const PyLongObject* op) { return _PyLong_CompactValue((PyLongObject*)op); } + PyObject* PyLong_FromInt32(int32_t value) { return PyLong_FromNativeBytes(&value, sizeof(value), -1); } @@ -6815,3 +6816,122 @@ int PyLong_AsUInt64(PyObject *obj, uint64_t *value) { LONG_TO_UINT(obj, value, "C uint64_t"); } + + +static const PyLongLayout PyLong_LAYOUT = { + .bits_per_digit = PyLong_SHIFT, + .digits_order = -1, // least significant first + .digit_endianness = PY_LITTLE_ENDIAN ? -1 : 1, + .digit_size = sizeof(digit), +}; + + +const PyLongLayout* +PyLong_GetNativeLayout(void) +{ + return &PyLong_LAYOUT; +} + + +int +PyLong_Export(PyObject *obj, PyLongExport *export_long) +{ + if (!PyLong_Check(obj)) { + memset(export_long, 0, sizeof(*export_long)); + PyErr_Format(PyExc_TypeError, "expect int, got %T", obj); + return -1; + } + + // Fast-path: try to convert to a int64_t + int overflow; +#if SIZEOF_LONG == 8 + long value = PyLong_AsLongAndOverflow(obj, &overflow); +#else + // Windows has 32-bit long, so use 64-bit long long instead + long long value = PyLong_AsLongLongAndOverflow(obj, &overflow); +#endif + Py_BUILD_ASSERT(sizeof(value) == sizeof(int64_t)); + // the function cannot fail since obj is a PyLongObject + assert(!(value == -1 && PyErr_Occurred())); + + if (!overflow) { + export_long->value = value; + export_long->negative = 0; + export_long->ndigits = 0; + export_long->digits = NULL; + export_long->_reserved = 0; + } + else { + PyLongObject *self = (PyLongObject*)obj; + export_long->value = 0; + export_long->negative = _PyLong_IsNegative(self); + export_long->ndigits = _PyLong_DigitCount(self); + if (export_long->ndigits == 0) { + export_long->ndigits = 1; + } + export_long->digits = self->long_value.ob_digit; + export_long->_reserved = (Py_uintptr_t)Py_NewRef(obj); + } + return 0; +} + + +void +PyLong_FreeExport(PyLongExport *export_long) +{ + PyObject *obj = (PyObject*)export_long->_reserved; + if (obj) { + export_long->_reserved = 0; + Py_DECREF(obj); + } +} + + +/* --- PyLongWriter API --------------------------------------------------- */ + +PyLongWriter* +PyLongWriter_Create(int negative, Py_ssize_t ndigits, void **digits) +{ + if (ndigits <= 0) { + PyErr_SetString(PyExc_ValueError, "ndigits must be positive"); + goto error; + } + assert(digits != NULL); + + PyLongObject *obj = _PyLong_New(ndigits); + if (obj == NULL) { + goto error; + } + if (negative) { + _PyLong_FlipSign(obj); + } + + *digits = obj->long_value.ob_digit; + return (PyLongWriter*)obj; + +error: + *digits = NULL; + return NULL; +} + + +void +PyLongWriter_Discard(PyLongWriter *writer) +{ + PyLongObject *obj = (PyLongObject *)writer; + assert(Py_REFCNT(obj) == 1); + Py_DECREF(obj); +} + + +PyObject* +PyLongWriter_Finish(PyLongWriter *writer) +{ + PyLongObject *obj = (PyLongObject *)writer; + assert(Py_REFCNT(obj) == 1); + + // Normalize and get singleton if possible + obj = maybe_small_long(long_normalize(obj)); + + return (PyObject*)obj; +} diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 686f3935d91bda..c8c30a7985aa2e 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -319,6 +319,7 @@ Objects/exceptions.c - static_exceptions - Objects/genobject.c - ASYNC_GEN_IGNORED_EXIT_MSG - Objects/genobject.c - NON_INIT_CORO_MSG - Objects/longobject.c - _PyLong_DigitValue - +Objects/longobject.c - PyLong_LAYOUT - Objects/object.c - _Py_SwappedOp - Objects/object.c - _Py_abstract_hack - Objects/object.c - last_final_reftotal - From 8bc18182a7c28f86265c9d82bd0338137480921c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:16:22 +0100 Subject: [PATCH 45/46] gh-127691: add type checks when using `PyUnicodeError` objects (GH-127694) --- Doc/whatsnew/3.14.rst | 6 + ...-12-06-16-53-34.gh-issue-127691.k_Jitp.rst | 3 + Objects/exceptions.c | 216 ++++++++++++------ 3 files changed, 157 insertions(+), 68 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5ce398ab93d6b4..095949242c09d9 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1045,6 +1045,12 @@ New features * Add :c:func:`PyUnstable_Object_EnableDeferredRefcount` for enabling deferred reference counting, as outlined in :pep:`703`. +* The :ref:`Unicode Exception Objects ` C API + now raises a :exc:`TypeError` if its exception argument is not + a :exc:`UnicodeError` object. + (Contributed by Bénédikt Tran in :gh:`127691`.) + + Porting to Python 3.14 ---------------------- diff --git a/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst b/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst new file mode 100644 index 00000000000000..c942ff3d9eda53 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst @@ -0,0 +1,3 @@ +The :ref:`Unicode Exception Objects ` C API +now raises a :exc:`TypeError` if its exception argument is not +a :exc:`UnicodeError` object. Patch by Bénédikt Tran. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 287cbc25305964..6880c24196cbb8 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2668,7 +2668,7 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, "Unicode related error."); static PyObject * -get_string(PyObject *attr, const char *name) +get_bytes(PyObject *attr, const char *name) { if (!attr) { PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); @@ -2748,40 +2748,74 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) return end; } +#define _PyUnicodeError_CAST(PTR) ((PyUnicodeErrorObject *)(PTR)) +#define PyUnicodeError_Check(PTR) \ + PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError) +#define PyUnicodeError_CAST(PTR) \ + (assert(PyUnicodeError_Check(PTR)), _PyUnicodeError_CAST(PTR)) + + +static inline int +check_unicode_error_type(PyObject *self, const char *expect_type) +{ + if (!PyUnicodeError_Check(self)) { + PyErr_Format(PyExc_TypeError, + "expecting a %s object, got %T", expect_type, self); + return -1; + } + return 0; +} + + +static inline PyUnicodeErrorObject * +as_unicode_error(PyObject *self, const char *expect_type) +{ + int rc = check_unicode_error_type(self, expect_type); + return rc < 0 ? NULL : _PyUnicodeError_CAST(self); +} + PyObject * -PyUnicodeEncodeError_GetEncoding(PyObject *exc) +PyUnicodeEncodeError_GetEncoding(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding"); } PyObject * -PyUnicodeDecodeError_GetEncoding(PyObject *exc) +PyUnicodeDecodeError_GetEncoding(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding"); } PyObject * -PyUnicodeEncodeError_GetObject(PyObject *exc) +PyUnicodeEncodeError_GetObject(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->object, "object"); } PyObject * -PyUnicodeDecodeError_GetObject(PyObject *exc) +PyUnicodeDecodeError_GetObject(PyObject *self) { - return get_string(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_bytes(exc->object, "object"); } PyObject * -PyUnicodeTranslateError_GetObject(PyObject *exc) +PyUnicodeTranslateError_GetObject(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? NULL : get_unicode(exc->object, "object"); } int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + if (exc == NULL) { + return -1; + } PyObject *obj = get_unicode(exc->object, "object"); if (obj == NULL) { return -1; @@ -2796,8 +2830,11 @@ PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_string(exc->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_bytes(exc->object, "object"); if (obj == NULL) { return -1; } @@ -2809,45 +2846,63 @@ PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) int -PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - return PyUnicodeEncodeError_GetStart(exc, start); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { + return -1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); + return 0; } static inline int unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) { - ((PyUnicodeErrorObject *)self)->start = start; + PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self); + exc->start = start; return 0; } int -PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int -PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int -PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + if (exc == NULL) { + return -1; + } PyObject *obj = get_unicode(exc->object, "object"); if (obj == NULL) { return -1; @@ -2862,8 +2917,11 @@ PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_string(exc->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_bytes(exc->object, "object"); if (obj == NULL) { return -1; } @@ -2875,108 +2933,130 @@ PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) int -PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - return PyUnicodeEncodeError_GetEnd(exc, end); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { + return -1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); + return 0; } static inline int -unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) +unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; + PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self); + exc->end = end; return 0; } int -PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } int -PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } int -PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } + PyObject * -PyUnicodeEncodeError_GetReason(PyObject *exc) +PyUnicodeEncodeError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } PyObject * -PyUnicodeDecodeError_GetReason(PyObject *exc) +PyUnicodeDecodeError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } PyObject * -PyUnicodeTranslateError_GetReason(PyObject *exc) +PyUnicodeTranslateError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } int -PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason) +PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } int -PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason) +PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } int -PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason) +PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } static int -UnicodeError_clear(PyUnicodeErrorObject *self) +UnicodeError_clear(PyObject *self) { - Py_CLEAR(self->encoding); - Py_CLEAR(self->object); - Py_CLEAR(self->reason); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + Py_CLEAR(exc->encoding); + Py_CLEAR(exc->object); + Py_CLEAR(exc->reason); return BaseException_clear((PyBaseExceptionObject *)self); } static void -UnicodeError_dealloc(PyUnicodeErrorObject *self) +UnicodeError_dealloc(PyObject *self) { + PyTypeObject *type = Py_TYPE(self); _PyObject_GC_UNTRACK(self); - UnicodeError_clear(self); - Py_TYPE(self)->tp_free((PyObject *)self); + (void)UnicodeError_clear(self); + type->tp_free(self); } static int -UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg) +UnicodeError_traverse(PyObject *self, visitproc visit, void *arg) { - Py_VISIT(self->encoding); - Py_VISIT(self->object); - Py_VISIT(self->reason); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + Py_VISIT(exc->encoding); + Py_VISIT(exc->object); + Py_VISIT(exc->reason); return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); } @@ -3015,7 +3095,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; @@ -3027,7 +3107,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeEncodeError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL; PyObject *encoding_str = NULL; @@ -3135,7 +3215,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) } } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->object, object /* already a strong reference */); exc->start = start; @@ -3147,7 +3227,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeDecodeError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL; PyObject *encoding_str = NULL; @@ -3236,7 +3316,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; exc->end = end; @@ -3248,7 +3328,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeTranslateError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL; From 5dd775bed086909722ec7014a7c4f77a35f74a80 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 14 Dec 2024 01:21:46 +0900 Subject: [PATCH 46/46] gh-126024: unicodeobject: optimize find_first_nonascii (GH-127790) Remove 1 branch. --- Objects/unicodeobject.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 33c4747bbef488..b7aeb06d32bcec 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5077,21 +5077,24 @@ load_unaligned(const unsigned char *p, size_t size) static Py_ssize_t find_first_nonascii(const unsigned char *start, const unsigned char *end) { + // The search is done in `size_t` chunks. + // The start and end might not be aligned at `size_t` boundaries, + // so they're handled specially. + const unsigned char *p = start; if (end - start >= SIZEOF_SIZE_T) { - const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); + // Avoid unaligned read. #if PY_LITTLE_ENDIAN && HAVE_CTZ - if (p < p2) { - size_t u; - memcpy(&u, p, sizeof(size_t)); - u &= ASCII_CHAR_MASK; - if (u) { - return (ctz(u) - 7) / 8; - } - p = p2; + size_t u; + memcpy(&u, p, sizeof(size_t)); + u &= ASCII_CHAR_MASK; + if (u) { + return (ctz(u) - 7) / 8; } + p = _Py_ALIGN_DOWN(p + SIZEOF_SIZE_T, SIZEOF_SIZE_T); #else /* PY_LITTLE_ENDIAN && HAVE_CTZ */ + const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); while (p < p2) { if (*p & 0x80) { return p - start; @@ -5099,6 +5102,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) p++; } #endif + const unsigned char *e = end - SIZEOF_SIZE_T; while (p <= e) { size_t u = (*(const size_t *)p) & ASCII_CHAR_MASK; @@ -5115,6 +5119,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) } } #if PY_LITTLE_ENDIAN && HAVE_CTZ + assert((end - p) < SIZEOF_SIZE_T); // we can not use *(const size_t*)p to avoid buffer overrun. size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK; if (u) {