diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 7dbbe71b2131e7..4ef543d7369734 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -104,7 +104,7 @@ jobs: # The `find` line is required as a result of https://github.com/actions/runner-images/issues/9966. # This is a bug in the macOS runner image where the pre-installed Python is installed in the same - # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes + # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes # the symlink to the pre-installed Python so that the Homebrew Python is used instead. - name: Native macOS if: runner.os == 'macOS' diff --git a/Doc/Makefile b/Doc/Makefile index 22e43ee3e542ee..4a704ad58b33d3 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -144,7 +144,7 @@ pydoc-topics: build .PHONY: gettext gettext: BUILDER = gettext -gettext: SPHINXOPTS += -d build/doctrees-gettext +gettext: override SPHINXOPTS := -d build/doctrees-gettext $(SPHINXOPTS) gettext: build .PHONY: htmlview diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index 86d3967d9fb577..444b3456f051d8 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -529,19 +529,19 @@ The following functions and structs are used to create The following “offset” fields cannot be set using :c:type:`PyType_Slot`: - * :c:member:`~PyTypeObject.tp_weaklistoffset` - (use :c:macro:`Py_TPFLAGS_MANAGED_WEAKREF` instead if possible) - * :c:member:`~PyTypeObject.tp_dictoffset` - (use :c:macro:`Py_TPFLAGS_MANAGED_DICT` instead if possible) - * :c:member:`~PyTypeObject.tp_vectorcall_offset` - (use ``"__vectorcalloffset__"`` in - :ref:`PyMemberDef `) - - If it is not possible to switch to a ``MANAGED`` flag (for example, - for vectorcall or to support Python older than 3.12), specify the - offset in :c:member:`Py_tp_members `. - See :ref:`PyMemberDef documentation ` - for details. + * :c:member:`~PyTypeObject.tp_weaklistoffset` + (use :c:macro:`Py_TPFLAGS_MANAGED_WEAKREF` instead if possible) + * :c:member:`~PyTypeObject.tp_dictoffset` + (use :c:macro:`Py_TPFLAGS_MANAGED_DICT` instead if possible) + * :c:member:`~PyTypeObject.tp_vectorcall_offset` + (use ``"__vectorcalloffset__"`` in + :ref:`PyMemberDef `) + + If it is not possible to switch to a ``MANAGED`` flag (for example, + for vectorcall or to support Python older than 3.12), specify the + offset in :c:member:`Py_tp_members `. + See :ref:`PyMemberDef documentation ` + for details. The following internal fields cannot be set at all when creating a heap type: @@ -557,20 +557,18 @@ The following functions and structs are used to create To avoid issues, use the *bases* argument of :c:func:`PyType_FromSpecWithBases` instead. - .. versionchanged:: 3.9 - - Slots in :c:type:`PyBufferProcs` may be set in the unlimited API. + .. versionchanged:: 3.9 + Slots in :c:type:`PyBufferProcs` may be set in the unlimited API. - .. versionchanged:: 3.11 - :c:member:`~PyBufferProcs.bf_getbuffer` and - :c:member:`~PyBufferProcs.bf_releasebuffer` are now available - under the :ref:`limited API `. + .. versionchanged:: 3.11 + :c:member:`~PyBufferProcs.bf_getbuffer` and + :c:member:`~PyBufferProcs.bf_releasebuffer` are now available + under the :ref:`limited API `. - .. versionchanged:: 3.14 - - The field :c:member:`~PyTypeObject.tp_vectorcall` can now set - using ``Py_tp_vectorcall``. See the field's documentation - for details. + .. versionchanged:: 3.14 + The field :c:member:`~PyTypeObject.tp_vectorcall` can now set + using ``Py_tp_vectorcall``. See the field's documentation + for details. .. c:member:: void *pfunc diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index d49fd7540a5162..7d8cc0adbc2df0 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -180,7 +180,7 @@ PyCapsule_IsValid:const char*:name:: PyCapsule_New:PyObject*::+1: PyCapsule_New:void*:pointer:: PyCapsule_New:const char *:name:: -PyCapsule_New::void (* destructor)(PyObject* ):: +PyCapsule_New:void (*)(PyObject *):destructor:: PyCapsule_SetContext:int::: PyCapsule_SetContext:PyObject*:self:0: @@ -349,11 +349,11 @@ PyComplex_CheckExact:int::: PyComplex_CheckExact:PyObject*:p:0: PyComplex_FromCComplex:PyObject*::+1: -PyComplex_FromCComplex::Py_complex v:: +PyComplex_FromCComplex:Py_complex:v:: PyComplex_FromDoubles:PyObject*::+1: -PyComplex_FromDoubles::double real:: -PyComplex_FromDoubles::double imag:: +PyComplex_FromDoubles:double:real:: +PyComplex_FromDoubles:double:imag:: PyComplex_ImagAsDouble:double::: PyComplex_ImagAsDouble:PyObject*:op:0: @@ -620,7 +620,9 @@ PyErr_GetExcInfo:PyObject**:pvalue:+1: PyErr_GetExcInfo:PyObject**:ptraceback:+1: PyErr_GetRaisedException:PyObject*::+1: -PyErr_SetRaisedException:::: + +PyErr_SetRaisedException:void::: +PyErr_SetRaisedException:PyObject *:exc:0:stolen PyErr_GivenExceptionMatches:int::: PyErr_GivenExceptionMatches:PyObject*:given:0: @@ -640,9 +642,9 @@ PyErr_NewExceptionWithDoc:PyObject*:dict:0: PyErr_NoMemory:PyObject*::null: PyErr_NormalizeException:void::: -PyErr_NormalizeException:PyObject**:exc::??? -PyErr_NormalizeException:PyObject**:val::??? -PyErr_NormalizeException:PyObject**:tb::??? +PyErr_NormalizeException:PyObject**:exc:+1:??? +PyErr_NormalizeException:PyObject**:val:+1:??? +PyErr_NormalizeException:PyObject**:tb:+1:??? PyErr_Occurred:PyObject*::0: @@ -1314,7 +1316,7 @@ PyMapping_GetItemString:const char*:key:: PyMapping_HasKey:int::: PyMapping_HasKey:PyObject*:o:0: -PyMapping_HasKey:PyObject*:key:: +PyMapping_HasKey:PyObject*:key:0: PyMapping_HasKeyString:int::: PyMapping_HasKeyString:PyObject*:o:0: @@ -1474,7 +1476,7 @@ PyModule_GetState:void*::: PyModule_GetState:PyObject*:module:0: PyModule_New:PyObject*::+1: -PyModule_New::char* name:: +PyModule_New:char*:name:: PyModule_NewObject:PyObject*::+1: PyModule_NewObject:PyObject*:name:+1: @@ -1984,10 +1986,10 @@ PyRun_StringFlags:PyObject*:locals:0: PyRun_StringFlags:PyCompilerFlags*:flags:: PySeqIter_Check:int::: -PySeqIter_Check::op:: +PySeqIter_Check:PyObject *:op:0: PySeqIter_New:PyObject*::+1: -PySeqIter_New:PyObject*:seq:: +PySeqIter_New:PyObject*:seq:0: PySequence_Check:int::: PySequence_Check:PyObject*:o:0: @@ -2421,7 +2423,7 @@ PyUnicode_GET_LENGTH:PyObject*:o:0: PyUnicode_KIND:int::: PyUnicode_KIND:PyObject*:o:0: -PyUnicode_MAX_CHAR_VALUE:::: +PyUnicode_MAX_CHAR_VALUE:Py_UCS4::: PyUnicode_MAX_CHAR_VALUE:PyObject*:o:0: Py_UNICODE_ISALNUM:int::: @@ -2488,7 +2490,7 @@ PyUnicode_FromWideChar:const wchar_t*:w:: PyUnicode_FromWideChar:Py_ssize_t:size:: PyUnicode_AsWideChar:Py_ssize_t::: -PyUnicode_AsWideChar:PyObject*:*unicode:0: +PyUnicode_AsWideChar:PyObject*:unicode:0: PyUnicode_AsWideChar:wchar_t*:w:: PyUnicode_AsWideChar:Py_ssize_t:size:: @@ -2541,7 +2543,7 @@ PyUnicode_AsUTF8String:PyObject*:unicode:0: PyUnicode_AsUTF8AndSize:const char*::: PyUnicode_AsUTF8AndSize:PyObject*:unicode:0: -PyUnicode_AsUTF8AndSize:Py_ssize_t*:size:0: +PyUnicode_AsUTF8AndSize:Py_ssize_t*:size:: PyUnicode_AsUTF8:const char*::: PyUnicode_AsUTF8:PyObject*:unicode:0: @@ -2864,13 +2866,13 @@ PyUnicodeDecodeError_SetStart:PyObject*:exc:0: PyUnicodeDecodeError_SetStart:Py_ssize_t:start:: PyWeakref_Check:int::: -PyWeakref_Check:PyObject*:ob:: +PyWeakref_Check:PyObject*:ob:0: PyWeakref_CheckProxy:int::: -PyWeakref_CheckProxy:PyObject*:ob:: +PyWeakref_CheckProxy:PyObject*:ob:0: PyWeakref_CheckRef:int::: -PyWeakref_CheckRef:PyObject*:ob:: +PyWeakref_CheckRef:PyObject*:ob:0: PyWeakref_GET_OBJECT:PyObject*::0: PyWeakref_GET_OBJECT:PyObject*:ref:0: diff --git a/Doc/library/asyncio-sync.rst b/Doc/library/asyncio-sync.rst index 3cf8e2737e85dc..77c2e97da11990 100644 --- a/Doc/library/asyncio-sync.rst +++ b/Doc/library/asyncio-sync.rst @@ -259,8 +259,8 @@ Condition Note that a task *may* return from this call spuriously, which is why the caller should always re-check the state - and be prepared to :meth:`wait` again. For this reason, you may - prefer to use :meth:`wait_for` instead. + and be prepared to :meth:`~Condition.wait` again. For this reason, you may + prefer to use :meth:`~Condition.wait_for` instead. .. coroutinemethod:: wait_for(predicate) @@ -268,7 +268,7 @@ Condition The predicate must be a callable which result will be interpreted as a boolean value. The method will repeatedly - :meth:`wait` until the predicate evaluates to *true*. The final value is the + :meth:`~Condition.wait` until the predicate evaluates to *true*. The final value is the return value. @@ -434,7 +434,7 @@ Barrier .. coroutinemethod:: abort() Put the barrier into a broken state. This causes any active or future - calls to :meth:`wait` to fail with the :class:`BrokenBarrierError`. + calls to :meth:`~Barrier.wait` to fail with the :class:`BrokenBarrierError`. Use this for example if one of the tasks needs to abort, to avoid infinite waiting tasks. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 869a47c1261293..f9322da3d4fbb0 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -576,6 +576,14 @@ sys from other interpreters than the one it's called in. +tkinter +------- + +* Make tkinter widget methods :meth:`!after` and :meth:`!after_idle` accept + arguments passed by keyword. + (Contributed by Zhikang Yan in :gh:`126899`.) + + unicodedata ----------- diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index f1ca54839fbc38..2ae48002d720e9 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -99,6 +99,8 @@ typedef struct _gc_stats { uint64_t collections; uint64_t object_visits; uint64_t objects_collected; + uint64_t objects_transitively_reachable; + uint64_t objects_not_transitively_reachable; } GCStats; typedef struct _uop_stats { diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 8c0100390d036e..b786c5f49e9831 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -75,6 +75,7 @@ typedef struct _PyInterpreterFrame { _PyStackRef *stackpointer; uint16_t return_offset; /* Only relevant during a function call */ char owner; + char visited; /* Locals and stack */ _PyStackRef localsplus[1]; } _PyInterpreterFrame; @@ -207,6 +208,7 @@ _PyFrame_Initialize( #endif frame->return_offset = 0; frame->owner = FRAME_OWNED_BY_THREAD; + frame->visited = 0; for (int i = null_locals_from; i < code->co_nlocalsplus; i++) { frame->localsplus[i] = PyStackRef_NULL; @@ -389,6 +391,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int frame->instr_ptr = _PyCode_CODE(code); #endif frame->owner = FRAME_OWNED_BY_THREAD; + frame->visited = 0; frame->return_offset = 0; #ifdef Py_GIL_DISABLED diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 479fe10d00066d..4ff34bf8ead7d0 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -10,11 +10,11 @@ extern "C" { /* GC information is stored BEFORE the object structure. */ typedef struct { - // Pointer to next object in the list. + // Tagged pointer to next object in the list. // 0 means the object is not tracked uintptr_t _gc_next; - // Pointer to previous object in the list. + // Tagged pointer to previous object in the list. // Lowest two bits are used for flags documented later. uintptr_t _gc_prev; } PyGC_Head; @@ -284,6 +284,11 @@ struct gc_generation_stats { Py_ssize_t uncollectable; }; +enum _GCPhase { + GC_PHASE_MARK = 0, + GC_PHASE_COLLECT = 1 +}; + struct _gc_runtime_state { /* List of objects that still need to be cleaned up, singly linked * via their gc headers' gc_prev pointers. */ @@ -311,6 +316,7 @@ struct _gc_runtime_state { Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ int visited_space; + int phase; #ifdef Py_GIL_DISABLED /* This is the number of objects that survived the last full diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 2c666f9be4bd79..f03e484f5ef8b0 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -10,6 +10,9 @@ extern "C" { PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); extern void _PyList_DebugMallocStats(FILE *out); +// _PyList_GetItemRef should be used only when the object is known as a list +// because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does. +extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i); #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 34d835a7f84ee7..c52ed8f14707ba 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -471,8 +471,8 @@ static inline void _PyObject_GC_TRACK( PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); _PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_PREV(gc, last); - /* Young objects will be moved into the visited space during GC, so set the bit here */ - gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space; + uintptr_t not_visited = 1 ^ interp->gc.visited_space; + gc->_gc_next = ((uintptr_t)generation0) | not_visited; generation0->_gc_prev = (uintptr_t)gc; #endif } diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 5ce172856e1b19..d63c8df8ca6690 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1952,7 +1952,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, - [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 9f6748945bab36..1260b957ce9482 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -137,6 +137,7 @@ extern PyTypeObject _PyExc_MemoryError; { .threshold = 0, }, \ }, \ .work_to_do = -5000, \ + .phase = GC_PHASE_MARK, \ }, \ .qsbr = { \ .wr_seq = QSBR_INITIAL, \ diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1c1f478c3833c8..1825bb3a5abc80 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -84,7 +84,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG, + [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/InternalDocs/README.md b/InternalDocs/README.md index f6aa3db3b384af..8cdd06d189f362 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -1,4 +1,3 @@ - # CPython Internals Documentation The documentation in this folder is intended for CPython maintainers. diff --git a/InternalDocs/adaptive.md b/InternalDocs/adaptive.md index 4ae9e85b387f39..7cfa8e52310460 100644 --- a/InternalDocs/adaptive.md +++ b/InternalDocs/adaptive.md @@ -96,6 +96,7 @@ quality of specialization and keeping the overhead of specialization low. Specialized instructions must be fast. In order to be fast, specialized instructions should be tailored for a particular set of values that allows them to: + 1. Verify that incoming value is part of that set with low overhead. 2. Perform the operation quickly. @@ -107,9 +108,11 @@ For example, `LOAD_GLOBAL_MODULE` is specialized for `globals()` dictionaries that have a keys with the expected version. This can be tested quickly: + * `globals->keys->dk_version == expected_version` and the operation can be performed quickly: + * `value = entries[cache->index].me_value;`. Because it is impossible to measure the performance of an instruction without @@ -122,10 +125,11 @@ base instruction. ### Implementation of specialized instructions In general, specialized instructions should be implemented in two parts: + 1. A sequence of guards, each of the form - `DEOPT_IF(guard-condition-is-false, BASE_NAME)`. + `DEOPT_IF(guard-condition-is-false, BASE_NAME)`. 2. The operation, which should ideally have no branches and - a minimum number of dependent memory accesses. + a minimum number of dependent memory accesses. In practice, the parts may overlap, as data required for guards can be re-used in the operation. diff --git a/InternalDocs/changing_grammar.md b/InternalDocs/changing_grammar.md index 1a5eebdc1418dc..c6b895135a360d 100644 --- a/InternalDocs/changing_grammar.md +++ b/InternalDocs/changing_grammar.md @@ -32,7 +32,7 @@ Below is a checklist of things that may need to change. [`Include/internal/pycore_ast.h`](../Include/internal/pycore_ast.h) and [`Python/Python-ast.c`](../Python/Python-ast.c). -* [`Parser/lexer/`](../Parser/lexer/) contains the tokenization code. +* [`Parser/lexer/`](../Parser/lexer) contains the tokenization code. This is where you would add a new type of comment or string literal, for example. * [`Python/ast.c`](../Python/ast.c) will need changes to validate AST objects @@ -60,4 +60,4 @@ Below is a checklist of things that may need to change. to the tokenizer. * Documentation must be written! Specifically, one or more of the pages in - [`Doc/reference/`](../Doc/reference/) will need to be updated. + [`Doc/reference/`](../Doc/reference) will need to be updated. diff --git a/InternalDocs/code_objects.md b/InternalDocs/code_objects.md index bee4a9d0a08915..d4e28c6b238b48 100644 --- a/InternalDocs/code_objects.md +++ b/InternalDocs/code_objects.md @@ -1,4 +1,3 @@ - # Code objects A `CodeObject` is a builtin Python type that represents a compiled executable, @@ -43,7 +42,7 @@ so a compact format is very important. Note that traceback objects don't store all this information -- they store the start line number, for backward compatibility, and the "last instruction" value. The rest can be computed from the last instruction (`tb_lasti`) with the help of the -locations table. For Python code, there is a convenience method +locations table. For Python code, there is a convenience method (`codeobject.co_positions`)[https://docs.python.org/dev/reference/datamodel.html#codeobject.co_positions] which returns an iterator of `({line}, {endline}, {column}, {endcolumn})` tuples, one per instruction. @@ -75,9 +74,11 @@ returned by the `co_positions()` iterator. > See [`Objects/lnotab_notes.txt`](../Objects/lnotab_notes.txt) for more details. `co_linetable` consists of a sequence of location entries. -Each entry starts with a byte with the most significant bit set, followed by zero or more bytes with the most significant bit unset. +Each entry starts with a byte with the most significant bit set, followed by +zero or more bytes with the most significant bit unset. Each entry contains the following information: + * The number of code units covered by this entry (length) * The start line * The end line @@ -86,54 +87,88 @@ Each entry contains the following information: The first byte has the following format: -Bit 7 | Bits 3-6 | Bits 0-2 - ---- | ---- | ---- - 1 | Code | Length (in code units) - 1 +| Bit 7 | Bits 3-6 | Bits 0-2 | +|-------|----------|----------------------------| +| 1 | Code | Length (in code units) - 1 | The codes are enumerated in the `_PyCodeLocationInfoKind` enum. -## Variable-length integer encodings +### Variable-length integer encodings -Integers are often encoded using a variable-length integer encoding +Integers are often encoded using a variable length integer encoding -### Unsigned integers (`varint`) +#### Unsigned integers (`varint`) Unsigned integers are encoded in 6-bit chunks, least significant first. Each chunk but the last has bit 6 set. For example: * 63 is encoded as `0x3f` -* 200 is encoded as `0x48`, `0x03` +* 200 is encoded as `0x48`, `0x03` since ``200 = (0x03 << 6) | 0x48``. + +The following helper can be used to convert an integer into a `varint`: + +```py +def encode_varint(s): + ret = [] + while s >= 64: + ret.append(((s & 0x3F) | 0x40) & 0x3F) + s >>= 6 + ret.append(s & 0x3F) + return bytes(ret) +``` + +To convert a `varint` into an unsigned integer: + +```py +def decode_varint(chunks): + ret = 0 + for chunk in reversed(chunks): + ret = (ret << 6) | chunk + return ret +``` -### Signed integers (`svarint`) +#### Signed integers (`svarint`) Signed integers are encoded by converting them to unsigned integers, using the following function: -```Python -def convert(s): + +```py +def svarint_to_varint(s): if s < 0: - return ((-s)<<1) | 1 + return ((-s) << 1) | 1 else: - return (s<<1) + return s << 1 +``` + +To convert a `varint` into a signed integer: + +```py +def varint_to_svarint(uval): + return -(uval >> 1) if uval & 1 else (uval >> 1) ``` -*Location entries* +### Location entries The meaning of the codes and the following bytes are as follows: -Code | Meaning | Start line | End line | Start column | End column - ---- | ---- | ---- | ---- | ---- | ---- - 0-9 | Short form | Δ 0 | Δ 0 | See below | See below - 10-12 | One line form | Δ (code - 10) | Δ 0 | unsigned byte | unsigned byte - 13 | No column info | Δ svarint | Δ 0 | None | None - 14 | Long form | Δ svarint | Δ varint | varint | varint - 15 | No location | None | None | None | None +| Code | Meaning | Start line | End line | Start column | End column | +|-------|----------------|---------------|----------|---------------|---------------| +| 0-9 | Short form | Δ 0 | Δ 0 | See below | See below | +| 10-12 | One line form | Δ (code - 10) | Δ 0 | unsigned byte | unsigned byte | +| 13 | No column info | Δ svarint | Δ 0 | None | None | +| 14 | Long form | Δ svarint | Δ varint | varint | varint | +| 15 | No location | None | None | None | None | The Δ means the value is encoded as a delta from another value: + * Start line: Delta from the previous start line, or `co_firstlineno` for the first entry. -* End line: Delta from the start line +* End line: Delta from the start line. + +### The short forms -*The short forms* +Codes 0-9 are the short forms. The short form consists of two bytes, +the second byte holding additional column information. The code is the +start column divided by 8 (and rounded down). -Codes 0-9 are the short forms. The short form consists of two bytes, the second byte holding additional column information. The code is the start column divided by 8 (and rounded down). * Start column: `(code*8) + ((second_byte>>4)&7)` * End column: `start_column + (second_byte&15)` diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index ed4cfb23ca51f7..9e99f348acbd8f 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -1,4 +1,3 @@ - Compiler design =============== @@ -7,8 +6,8 @@ Abstract In CPython, the compilation from source code to bytecode involves several steps: -1. Tokenize the source code [Parser/lexer/](../Parser/lexer/) - and [Parser/tokenizer/](../Parser/tokenizer/). +1. Tokenize the source code [Parser/lexer/](../Parser/lexer) + and [Parser/tokenizer/](../Parser/tokenizer). 2. Parse the stream of tokens into an Abstract Syntax Tree [Parser/parser.c](../Parser/parser.c). 3. Transform AST into an instruction sequence @@ -134,9 +133,8 @@ this case) a `stmt_ty` struct with the appropriate initialization. The `FunctionDef()` constructor function sets 'kind' to `FunctionDef_kind` and initializes the *name*, *args*, *body*, and *attributes* fields. -See also -[Green Tree Snakes - The missing Python AST docs](https://greentreesnakes.readthedocs.io/en/latest) - by Thomas Kluyver. +See also [Green Tree Snakes - The missing Python AST docs]( +https://greentreesnakes.readthedocs.io/en/latest) by Thomas Kluyver. Memory management ================= @@ -260,12 +258,12 @@ manually -- `generic`, `identifier` and `int`. These types are found in [Include/internal/pycore_asdl.h](../Include/internal/pycore_asdl.h). Functions and macros for creating `asdl_xx_seq *` types are as follows: -`_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)` - Allocate memory for an `asdl_generic_seq` of the specified length -`_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)` - Allocate memory for an `asdl_identifier_seq` of the specified length -`_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)` - Allocate memory for an `asdl_int_seq` of the specified length +* `_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)`: + Allocate memory for an `asdl_generic_seq` of the specified length +* `_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)`: + Allocate memory for an `asdl_identifier_seq` of the specified length +* `_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)`: + Allocate memory for an `asdl_int_seq` of the specified length In addition to the three types mentioned above, some ASDL sequence types are automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py) and found in @@ -273,20 +271,20 @@ automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py) and found in Macros for using both manually defined and automatically generated ASDL sequence types are as follows: -`asdl_seq_GET(asdl_xx_seq *, int)` - Get item held at a specific position in an `asdl_xx_seq` -`asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)` - Set a specific index in an `asdl_xx_seq` to the specified value +* `asdl_seq_GET(asdl_xx_seq *, int)`: + Get item held at a specific position in an `asdl_xx_seq` +* `asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)`: + Set a specific index in an `asdl_xx_seq` to the specified value -Untyped counterparts exist for some of the typed macros. These are useful +Untyped counterparts exist for some of the typed macros. These are useful when a function needs to manipulate a generic ASDL sequence: -`asdl_seq_GET_UNTYPED(asdl_seq *, int)` - Get item held at a specific position in an `asdl_seq` -`asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)` - Set a specific index in an `asdl_seq` to the specified value -`asdl_seq_LEN(asdl_seq *)` - Return the length of an `asdl_seq` or `asdl_xx_seq` +* `asdl_seq_GET_UNTYPED(asdl_seq *, int)`: + Get item held at a specific position in an `asdl_seq` +* `asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)`: + Set a specific index in an `asdl_seq` to the specified value +* `asdl_seq_LEN(asdl_seq *)`: + Return the length of an `asdl_seq` or `asdl_xx_seq` Note that typed macros and functions are recommended over their untyped counterparts. Typed macros carry out checks in debug mode and aid @@ -379,33 +377,33 @@ arguments to a node that used the '*' modifier). Emission of bytecode is handled by the following macros: -* `ADDOP(struct compiler *, location, int)` - add a specified opcode -* `ADDOP_IN_SCOPE(struct compiler *, location, int)` - like `ADDOP`, but also exits current scope; used for adding return value - opcodes in lambdas and closures -* `ADDOP_I(struct compiler *, location, int, Py_ssize_t)` - add an opcode that takes an integer argument -* `ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)` - add an opcode with the proper argument based on the position of the - specified PyObject in PyObject sequence object, but with no handling of - mangled names; used for when you - need to do named lookups of objects such as globals, consts, or - parameters where name mangling is not possible and the scope of the - name is known; *TYPE* is the name of PyObject sequence - (`names` or `varnames`) -* `ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)` - just like `ADDOP_O`, but steals a reference to PyObject -* `ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)` - just like `ADDOP_O`, but name mangling is also handled; used for - attribute loading or importing based on name -* `ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)` - add the `LOAD_CONST` opcode with the proper argument based on the - position of the specified PyObject in the consts table. -* `ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)` - just like `ADDOP_LOAD_CONST_NEW`, but steals a reference to PyObject -* `ADDOP_JUMP(struct compiler *, location, int, basicblock *)` - create a jump to a basic block +* `ADDOP(struct compiler *, location, int)`: + add a specified opcode +* `ADDOP_IN_SCOPE(struct compiler *, location, int)`: + like `ADDOP`, but also exits current scope; used for adding return value + opcodes in lambdas and closures +* `ADDOP_I(struct compiler *, location, int, Py_ssize_t)`: + add an opcode that takes an integer argument +* `ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)`: + add an opcode with the proper argument based on the position of the + specified PyObject in PyObject sequence object, but with no handling of + mangled names; used for when you + need to do named lookups of objects such as globals, consts, or + parameters where name mangling is not possible and the scope of the + name is known; *TYPE* is the name of PyObject sequence + (`names` or `varnames`) +* `ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)`: + just like `ADDOP_O`, but steals a reference to PyObject +* `ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)`: + just like `ADDOP_O`, but name mangling is also handled; used for + attribute loading or importing based on name +* `ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)`: + add the `LOAD_CONST` opcode with the proper argument based on the + position of the specified PyObject in the consts table. +* `ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)`: + just like `ADDOP_LOAD_CONST_NEW`, but steals a reference to PyObject +* `ADDOP_JUMP(struct compiler *, location, int, basicblock *)`: + create a jump to a basic block The `location` argument is a struct with the source location to be associated with this instruction. It is typically extracted from an @@ -433,7 +431,7 @@ Finally, the sequence of pseudo-instructions is converted into actual bytecode. This includes transforming pseudo instructions into actual instructions, converting jump targets from logical labels to relative offsets, and construction of the [exception table](exception_handling.md) and -[locations table](locations.md). +[locations table](code_objects.md#source-code-locations). The bytecode and tables are then wrapped into a `PyCodeObject` along with additional metadata, including the `consts` and `names` arrays, information about function reference to the source code (filename, etc). All of this is implemented by @@ -453,7 +451,7 @@ in [Python/ceval.c](../Python/ceval.c). Important files =============== -* [Parser/](../Parser/) +* [Parser/](../Parser) * [Parser/Python.asdl](../Parser/Python.asdl): ASDL syntax file. @@ -534,7 +532,7 @@ Important files * [Python/instruction_sequence.c](../Python/instruction_sequence.c): A data structure representing a sequence of bytecode-like pseudo-instructions. -* [Include/](../Include/) +* [Include/](../Include) * [Include/cpython/code.h](../Include/cpython/code.h) : Header file for [Objects/codeobject.c](../Objects/codeobject.c); @@ -556,7 +554,7 @@ Important files : Declares `_PyAST_Validate()` external (from [Python/ast.c](../Python/ast.c)). * [Include/internal/pycore_symtable.h](../Include/internal/pycore_symtable.h) - : Header for [Python/symtable.c](../Python/symtable.c). + : Header for [Python/symtable.c](../Python/symtable.c). `struct symtable` and `PySTEntryObject` are defined here. * [Include/internal/pycore_parser.h](../Include/internal/pycore_parser.h) @@ -570,7 +568,7 @@ Important files by [Tools/cases_generator/opcode_id_generator.py](../Tools/cases_generator/opcode_id_generator.py). -* [Objects/](../Objects/) +* [Objects/](../Objects) * [Objects/codeobject.c](../Objects/codeobject.c) : Contains PyCodeObject-related code. @@ -579,7 +577,7 @@ Important files : Contains the `frame_setlineno()` function which should determine whether it is allowed to make a jump between two points in a bytecode. -* [Lib/](../Lib/) +* [Lib/](../Lib) * [Lib/opcode.py](../Lib/opcode.py) : opcode utilities exposed to Python. @@ -591,7 +589,7 @@ Important files Objects ======= -* [Locations](locations.md): Describes the location table +* [Locations](code_objects.md#source-code-locations): Describes the location table * [Frames](frames.md): Describes frames and the frame stack * [Objects/object_layout.md](../Objects/object_layout.md): Describes object layout for 3.11 and later * [Exception Handling](exception_handling.md): Describes the exception table diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md index 14066a5864b4da..28589787e1fad7 100644 --- a/InternalDocs/exception_handling.md +++ b/InternalDocs/exception_handling.md @@ -87,10 +87,10 @@ offset of the raising instruction should be pushed to the stack. Handling an exception, once an exception table entry is found, consists of the following steps: - 1. pop values from the stack until it matches the stack depth for the handler. - 2. if `lasti` is true, then push the offset that the exception was raised at. - 3. push the exception to the stack. - 4. jump to the target offset and resume execution. +1. pop values from the stack until it matches the stack depth for the handler. +2. if `lasti` is true, then push the offset that the exception was raised at. +3. push the exception to the stack. +4. jump to the target offset and resume execution. Reraising Exceptions and `lasti` @@ -107,13 +107,12 @@ Format of the exception table ----------------------------- Conceptually, the exception table consists of a sequence of 5-tuples: -``` - 1. `start-offset` (inclusive) - 2. `end-offset` (exclusive) - 3. `target` - 4. `stack-depth` - 5. `push-lasti` (boolean) -``` + +1. `start-offset` (inclusive) +2. `end-offset` (exclusive) +3. `target` +4. `stack-depth` +5. `push-lasti` (boolean) All offsets and lengths are in code units, not bytes. @@ -123,18 +122,19 @@ For it to be searchable quickly, we need to support binary search giving us log( Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry. It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as: - `start, size, target, depth, push-lasti`. +`start, size, target, depth, push-lasti`. Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each code unit takes 2 bytes. It also happens that depth is generally quite small. So, we need to encode: + ``` - `start` (up to 30 bits) - `size` (up to 30 bits) - `target` (up to 30 bits) - `depth` (up to ~8 bits) - `lasti` (1 bit) +start (up to 30 bits) +size (up to 30 bits) +target (up to 30 bits) +depth (up to ~8 bits) +lasti (1 bit) ``` We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set. @@ -145,29 +145,32 @@ The 8 bits of a byte are (msb left) SXdddddd where S is the start bit. X is the In addition, we combine `depth` and `lasti` into a single value, `((depth<<1)+lasti)`, before encoding. For example, the exception entry: + ``` - `start`: 20 - `end`: 28 - `target`: 100 - `depth`: 3 - `lasti`: False +start: 20 +end: 28 +target: 100 +depth: 3 +lasti: False ``` is encoded by first converting to the more compact four value form: + ``` - `start`: 20 - `size`: 8 - `target`: 100 - `depth<<1+lasti`: 6 +start: 20 +size: 8 +target: 100 +depth<<1+lasti: 6 ``` which is then encoded as: + ``` - 148 (MSB + 20 for start) - 8 (size) - 65 (Extend bit + 1) - 36 (Remainder of target, 100 == (1<<6)+36) - 6 +148 (MSB + 20 for start) +8 (size) +65 (Extend bit + 1) +36 (Remainder of target, 100 == (1<<6)+36) +6 ``` for a total of five bytes. diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md index 06dc8f0702c3d9..2598873ca98479 100644 --- a/InternalDocs/frames.md +++ b/InternalDocs/frames.md @@ -27,6 +27,7 @@ objects, so are not allocated in the per-thread stack. See `PyGenObject` in ## Layout Each activation record is laid out as: + * Specials * Locals * Stack diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 272a0834cbfe24..08db080a200ea4 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -1,4 +1,3 @@ - Garbage collector design ======================== @@ -117,7 +116,7 @@ general, the collection of all objects tracked by GC is partitioned into disjoin doubly linked list. Between collections, objects are partitioned into "generations", reflecting how often they've survived collection attempts. During collections, the generation(s) being collected are further partitioned into, for example, sets of reachable and unreachable objects. Doubly linked lists -support moving an object from one partition to another, adding a new object, removing an object +support moving an object from one partition to another, adding a new object, removing an object entirely (objects tracked by GC are most often reclaimed by the refcounting system when GC isn't running at all!), and merging partitions, all with a small constant number of pointer updates. With care, they also support iterating over a partition while objects are being added to - and @@ -478,6 +477,45 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. ``` +Optimization: visiting reachable objects +======================================== + +An object cannot be garbage if it can be reached. + +To avoid having to identify reference cycles across the whole heap, we can +reduce the amount of work done considerably by first moving most reachable objects +to the `visited` space. Empirically, most reachable objects can be reached from a +small set of global objects and local variables. +This step does much less work per object, so reduces the time spent +performing garbage collection by at least half. + +> [!NOTE] +> Objects that are not determined to be reachable by this pass are not necessarily +> unreachable. We still need to perform the main algorithm to determine which objects +> are actually unreachable. +We use the same technique of forming a transitive closure as the incremental +collector does to find reachable objects, seeding the list with some global +objects and the currently executing frames. + +This phase moves objects to the `visited` space, as follows: + +1. All objects directly referred to by any builtin class, the `sys` module, the `builtins` +module and all objects directly referred to from stack frames are added to a working +set of reachable objects. +2. Until this working set is empty: + 1. Pop an object from the set and move it to the `visited` space + 2. For each object directly reachable from that object: + * If it is not already in `visited` space and it is a GC object, + add it to the working set + + +Before each increment of collection is performed, the stacks are scanned +to check for any new stack frames that have been created since the last +increment. All objects directly referred to from those stack frames are +added to the working set. +Then the above algorithm is repeated, starting from step 2. + + Optimization: reusing fields to save memory =========================================== diff --git a/InternalDocs/generators.md b/InternalDocs/generators.md index d53f0f9bdff4e4..afa8b8f4bb8040 100644 --- a/InternalDocs/generators.md +++ b/InternalDocs/generators.md @@ -1,4 +1,3 @@ - Generators ========== diff --git a/InternalDocs/interpreter.md b/InternalDocs/interpreter.md index 4c10cbbed37735..ab149e43471072 100644 --- a/InternalDocs/interpreter.md +++ b/InternalDocs/interpreter.md @@ -1,4 +1,3 @@ - The bytecode interpreter ======================== diff --git a/InternalDocs/parser.md b/InternalDocs/parser.md index 348988b7c2f003..445b866fc0cb96 100644 --- a/InternalDocs/parser.md +++ b/InternalDocs/parser.md @@ -1,4 +1,3 @@ - Guide to the parser =================== @@ -444,15 +443,15 @@ How to regenerate the parser Once you have made the changes to the grammar files, to regenerate the `C` parser (the one used by the interpreter) just execute: -``` - make regen-pegen +```shell +$ make regen-pegen ``` using the `Makefile` in the main directory. If you are on Windows you can use the Visual Studio project files to regenerate the parser or to execute: -``` - ./PCbuild/build.bat --regen +```dos +PCbuild/build.bat --regen ``` The generated parser file is located at [`Parser/parser.c`](../Parser/parser.c). @@ -468,15 +467,15 @@ any modifications to this file (in order to implement new Pegen features) you wi need to regenerate the meta-parser (the parser that parses the grammar files). To do so just execute: -``` - make regen-pegen-metaparser +```shell +$ make regen-pegen-metaparser ``` If you are on Windows you can use the Visual Studio project files to regenerate the parser or to execute: -``` - ./PCbuild/build.bat --regen +```dos +PCbuild/build.bat --regen ``` @@ -516,15 +515,15 @@ be found in the [`Grammar/Tokens`](../Grammar/Tokens) file. If you change this file to add new tokens, make sure to regenerate the files by executing: -``` - make regen-token +```shell +$ make regen-token ``` If you are on Windows you can use the Visual Studio project files to regenerate the tokens or to execute: -``` - ./PCbuild/build.bat --regen +```dos +PCbuild/build.bat --regen ``` How tokens are generated and the rules governing this are completely up to the tokenizer @@ -546,8 +545,8 @@ by default** except for rules with the special marker `memo` after the rule name (and type, if present): ``` - rule_name[typr] (memo): - ... +rule_name[typr] (memo): + ... ``` By selectively turning on memoization for a handful of rules, the parser becomes @@ -593,25 +592,25 @@ are always reserved words, even in positions where they make no sense meaning in context. Trying to use a hard keyword as a variable will always fail: -``` - >>> class = 3 - File "", line 1 - class = 3 - ^ - SyntaxError: invalid syntax - >>> foo(class=3) - File "", line 1 - foo(class=3) - ^^^^^ - SyntaxError: invalid syntax +```pycon +>>> class = 3 +File "", line 1 + class = 3 + ^ +SyntaxError: invalid syntax +>>> foo(class=3) +File "", line 1 + foo(class=3) + ^^^^^ +SyntaxError: invalid syntax ``` While soft keywords don't have this limitation if used in a context other the one where they are defined as keywords: -``` - >>> match = 45 - >>> foo(match="Yeah!") +```pycon +>>> match = 45 +>>> foo(match="Yeah!") ``` The `match` and `case` keywords are soft keywords, so that they are @@ -621,21 +620,21 @@ argument names. You can get a list of all keywords defined in the grammar from Python: -``` - >>> import keyword - >>> keyword.kwlist - ['False', 'None', 'True', 'and', 'as', 'assert', 'async', 'await', 'break', - 'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally', 'for', - 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', - 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield'] +```pycon +>>> import keyword +>>> keyword.kwlist +['False', 'None', 'True', 'and', 'as', 'assert', 'async', 'await', 'break', +'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally', 'for', +'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', +'pass', 'raise', 'return', 'try', 'while', 'with', 'yield'] ``` as well as soft keywords: -``` - >>> import keyword - >>> keyword.softkwlist - ['_', 'case', 'match'] +```pycon +>>> import keyword +>>> keyword.softkwlist +['_', 'case', 'match'] ``` > [!CAUTION] @@ -736,7 +735,7 @@ displayed when the error is reported. > rule or not. For example: ``` - $ 42 + $ 42 ``` should trigger the syntax error in the `$` character. If your rule is not correctly defined this @@ -744,7 +743,7 @@ won't happen. As another example, suppose that you try to define a rule to match `print` statements in order to create a better error message and you define it as: ``` - invalid_print: "print" expression +invalid_print: "print" expression ``` This will **seem** to work because the parser will correctly parse `print(something)` because it is valid @@ -756,7 +755,7 @@ will be reported there instead of the `$` character. Generating AST objects ---------------------- -The output of the C parser used by CPython, which is generated from the +The output of the C parser used by CPython, which is generated from the [grammar file](../Grammar/python.gram), is a Python AST object (using C structures). This means that the actions in the grammar file generate AST objects when they succeed. Constructing these objects can be quite cumbersome @@ -798,7 +797,7 @@ Check the contents of these files to know which is the best place for new tests, depending on the nature of the new feature you are adding. Tests for the parser generator itself can be found in the -[test_peg_generator](../Lib/test_peg_generator) directory. +[test_peg_generator](../Lib/test/test_peg_generator) directory. Debugging generated parsers @@ -816,15 +815,15 @@ For this reason it is a good idea to experiment first by generating a Python parser. To do this, you can go to the [Tools/peg_generator](../Tools/peg_generator) directory on the CPython repository and manually call the parser generator by executing: -``` - $ python -m pegen python +```shell +$ python -m pegen python ``` This will generate a file called `parse.py` in the same directory that you can use to parse some input: -``` - $ python parse.py file_with_source_code_to_test.py +```shell +$ python parse.py file_with_source_code_to_test.py ``` As the generated `parse.py` file is just Python code, you can modify it @@ -848,8 +847,8 @@ can be a bit hard to understand at first. To activate verbose mode you can add the `-d` flag when executing Python: -``` - $ python -d file_to_test.py +```shell +$ python -d file_to_test.py ``` This will print **a lot** of output to `stderr` so it is probably better to dump @@ -857,7 +856,7 @@ it to a file for further analysis. The output consists of trace lines with the following structure:: ``` - ('>'|'-'|'+'|'!') []: ... + ('>'|'-'|'+'|'!') []: ... ``` Every line is indented by a different amount (``) depending on how diff --git a/InternalDocs/string_interning.md b/InternalDocs/string_interning.md index e0d20632516142..26a5197c6e70f3 100644 --- a/InternalDocs/string_interning.md +++ b/InternalDocs/string_interning.md @@ -2,6 +2,7 @@ *Interned* strings are conceptually part of an interpreter-global *set* of interned strings, meaning that: + - no two interned strings have the same content (across an interpreter); - two interned strings can be safely compared using pointer equality (Python `is`). @@ -61,6 +62,7 @@ if it's interned and mortal it needs extra processing in The converse is not true: interned strings can be mortal. For mortal interned strings: + - the 2 references from the interned dict (key & value) are excluded from their refcount - the deallocator (`unicode_dealloc`) removes the string from the interned dict @@ -90,6 +92,7 @@ modify in place. The functions take ownership of (“steal”) the reference to their argument, and update the argument with a *new* reference. This means: + - They're “reference neutral”. - They must not be called with a borrowed reference. diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index 996887cb713942..710aba9685efda 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -963,7 +963,7 @@ def answer_challenge(connection, authkey: bytes): f'Protocol error, expected challenge: {message=}') message = message[len(_CHALLENGE):] if len(message) < _MD5ONLY_MESSAGE_LENGTH: - raise AuthenticationError('challenge too short: {len(message)} bytes') + raise AuthenticationError(f'challenge too short: {len(message)} bytes') digest = _create_response(authkey, message) connection.send_bytes(digest) response = connection.recv_bytes(256) # reject large message diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 2c243d470d4eda..2b314b6c9a16bf 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -438,14 +438,6 @@ def stat(self, *, follow_symlinks=True): """ raise UnsupportedOperation(self._unsupported_msg('stat()')) - def lstat(self): - """ - Like stat(), except if the path points to a symlink, the symlink's - status information is returned, rather than its target's. - """ - return self.stat(follow_symlinks=False) - - # Convenience functions for querying the stat results def exists(self, *, follow_symlinks=True): @@ -505,7 +497,7 @@ def is_symlink(self): Whether this path is a symbolic link. """ try: - return S_ISLNK(self.lstat().st_mode) + return S_ISLNK(self.stat(follow_symlinks=False).st_mode) except (OSError, ValueError): return False @@ -743,27 +735,12 @@ def absolute(self): # Treat the root directory as the current working directory. return self.with_segments('/', *self._raw_paths) - @classmethod - def cwd(cls): - """Return a new path pointing to the current working directory.""" - # We call 'absolute()' rather than using 'os.getcwd()' directly to - # enable users to replace the implementation of 'absolute()' in a - # subclass and benefit from the new behaviour here. This works because - # os.path.abspath('.') == os.getcwd(). - return cls().absolute() - def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ raise UnsupportedOperation(self._unsupported_msg('expanduser()')) - @classmethod - def home(cls): - """Return a new path pointing to expanduser('~'). - """ - return cls("~").expanduser() - def readlink(self): """ Return the path to which the symbolic link points. @@ -789,7 +766,7 @@ def raise_error(*args): def lstat(path_str): path = self.with_segments(path_str) path._resolving = True - return path.lstat() + return path.stat(follow_symlinks=False) def readlink(path_str): path = self.with_segments(path_str) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index b27f456d375225..b5d9dc49f58463 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -542,6 +542,13 @@ def stat(self, *, follow_symlinks=True): """ return os.stat(self, follow_symlinks=follow_symlinks) + def lstat(self): + """ + Like stat(), except if the path points to a symlink, the symlink's + status information is returned, rather than its target's. + """ + return os.lstat(self) + def exists(self, *, follow_symlinks=True): """ Whether this path exists. @@ -719,6 +726,14 @@ def absolute(self): tail.extend(self._tail) return self._from_parsed_parts(drive, root, tail) + @classmethod + def cwd(cls): + """Return a new path pointing to the current working directory.""" + cwd = os.getcwd() + path = cls(cwd) + path._str = cwd # getcwd() returns a normalized path + return path + def resolve(self, strict=False): """ Make the path absolute, resolving all symlinks on the way and also @@ -900,6 +915,15 @@ def expanduser(self): return self + @classmethod + def home(cls): + """Return a new path pointing to expanduser('~'). + """ + homedir = os.path.expanduser("~") + if homedir == "~": + raise RuntimeError("Could not determine home directory.") + return cls(homedir) + @classmethod def from_uri(cls, uri): """Return a new path from the given 'file' URI.""" diff --git a/Lib/pdb.py b/Lib/pdb.py index b7f6fd4323407e..10d1923cdad2d6 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -438,6 +438,13 @@ def user_line(self, frame): if (self.mainpyfile != self.canonic(frame.f_code.co_filename)): return self._wait_for_mainpyfile = False + if self.trace_opcodes: + # GH-127321 + # We want to avoid stopping at an opcode that does not have + # an associated line number because pdb does not like it + if frame.f_lineno is None: + self.set_stepinstr() + return self.bp_commands(frame) self.interaction(frame, None) diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py index ee52700b51fd07..ad86609016e478 100644 --- a/Lib/sysconfig/__init__.py +++ b/Lib/sysconfig/__init__.py @@ -318,14 +318,22 @@ def get_default_scheme(): def get_makefile_filename(): """Return the path of the Makefile.""" + + # GH-127429: When cross-compiling, use the Makefile from the target, instead of the host Python. + if cross_base := os.environ.get('_PYTHON_PROJECT_BASE'): + return os.path.join(cross_base, 'Makefile') + if _PYTHON_BUILD: return os.path.join(_PROJECT_BASE, "Makefile") + if hasattr(sys, 'abiflags'): config_dir_name = f'config-{_PY_VERSION_SHORT}{sys.abiflags}' else: config_dir_name = 'config' + if hasattr(sys.implementation, '_multiarch'): config_dir_name += f'-{sys.implementation._multiarch}' + return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile') @@ -464,27 +472,44 @@ def get_path(name, scheme=get_default_scheme(), vars=None, expand=True): def _init_config_vars(): global _CONFIG_VARS _CONFIG_VARS = {} + + prefix = _PREFIX + exec_prefix = _EXEC_PREFIX + base_prefix = _BASE_PREFIX + base_exec_prefix = _BASE_EXEC_PREFIX + + try: + abiflags = sys.abiflags + except AttributeError: + abiflags = '' + + if os.name == 'posix': + _init_posix(_CONFIG_VARS) + # If we are cross-compiling, load the prefixes from the Makefile instead. + if '_PYTHON_PROJECT_BASE' in os.environ: + prefix = _CONFIG_VARS['prefix'] + exec_prefix = _CONFIG_VARS['exec_prefix'] + base_prefix = _CONFIG_VARS['prefix'] + base_exec_prefix = _CONFIG_VARS['exec_prefix'] + abiflags = _CONFIG_VARS['ABIFLAGS'] + # Normalized versions of prefix and exec_prefix are handy to have; # in fact, these are the standard versions used most places in the # Distutils. - _CONFIG_VARS['prefix'] = _PREFIX - _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX + _CONFIG_VARS['prefix'] = prefix + _CONFIG_VARS['exec_prefix'] = exec_prefix _CONFIG_VARS['py_version'] = _PY_VERSION _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT _CONFIG_VARS['py_version_nodot'] = _PY_VERSION_SHORT_NO_DOT - _CONFIG_VARS['installed_base'] = _BASE_PREFIX - _CONFIG_VARS['base'] = _PREFIX - _CONFIG_VARS['installed_platbase'] = _BASE_EXEC_PREFIX - _CONFIG_VARS['platbase'] = _EXEC_PREFIX + _CONFIG_VARS['installed_base'] = base_prefix + _CONFIG_VARS['base'] = prefix + _CONFIG_VARS['installed_platbase'] = base_exec_prefix + _CONFIG_VARS['platbase'] = exec_prefix _CONFIG_VARS['projectbase'] = _PROJECT_BASE _CONFIG_VARS['platlibdir'] = sys.platlibdir _CONFIG_VARS['implementation'] = _get_implementation() _CONFIG_VARS['implementation_lower'] = _get_implementation().lower() - try: - _CONFIG_VARS['abiflags'] = sys.abiflags - except AttributeError: - # sys.abiflags may not be defined on all platforms. - _CONFIG_VARS['abiflags'] = '' + _CONFIG_VARS['abiflags'] = abiflags try: _CONFIG_VARS['py_version_nodot_plat'] = sys.winver.replace('.', '') except AttributeError: @@ -493,8 +518,6 @@ def _init_config_vars(): if os.name == 'nt': _init_non_posix(_CONFIG_VARS) _CONFIG_VARS['VPATH'] = sys._vpath - if os.name == 'posix': - _init_posix(_CONFIG_VARS) if _HAS_USER_BASE: # Setting 'userbase' is done below the call to the # init function to enable using 'get_config_var' in diff --git a/Lib/sysconfig/__main__.py b/Lib/sysconfig/__main__.py index 5660a6c5105b9f..10728c709e1811 100644 --- a/Lib/sysconfig/__main__.py +++ b/Lib/sysconfig/__main__.py @@ -7,6 +7,7 @@ _PYTHON_BUILD, _get_sysconfigdata_name, get_config_h_filename, + get_config_var, get_config_vars, get_default_scheme, get_makefile_filename, @@ -161,7 +162,7 @@ def _print_config_dict(d, stream): def _get_pybuilddir(): pybuilddir = f'build/lib.{get_platform()}-{get_python_version()}' - if hasattr(sys, "gettotalrefcount"): + if get_config_var('Py_DEBUG') == '1': pybuilddir += '-pydebug' return pybuilddir @@ -229,11 +230,15 @@ def _generate_posix_vars(): f.write('build_time_vars = ') _print_config_dict(vars, stream=f) + print(f'Written {destfile}') + # Write a JSON file with the output of sysconfig.get_config_vars jsonfile = os.path.join(pybuilddir, _get_json_data_name()) with open(jsonfile, 'w') as f: json.dump(get_config_vars(), f, indent=2) + print(f'Written {jsonfile}') + # Create file used for sys.path fixup -- see Modules/getpath.c with open('pybuilddir.txt', 'w', encoding='utf8') as f: f.write(pybuilddir) diff --git a/Lib/test/libregrtest/refleak.py b/Lib/test/libregrtest/refleak.py index e783475cc7a36b..d0d1c8cdc9a11b 100644 --- a/Lib/test/libregrtest/refleak.py +++ b/Lib/test/libregrtest/refleak.py @@ -123,9 +123,9 @@ def get_pooled_int(value): xml_filename = 'refleak-xml.tmp' result = None dash_R_cleanup(fs, ps, pic, zdc, abcs) - support.gc_collect() for i in rep_range: + support.gc_collect() current = refleak_helper._hunting_for_refleaks refleak_helper._hunting_for_refleaks = True try: diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index f26411ace8fa73..55890e58ed4bae 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1260,27 +1260,6 @@ def test_super_instructions(self): got = self.get_disassembly(load_test, adaptive=True) self.do_disassembly_compare(got, dis_load_test_quickened_code) - @cpython_only - @requires_specialization - def test_binary_subscr_specialize(self): - binary_subscr_quicken = """\ - 0 RESUME_CHECK 0 - - 1 LOAD_NAME 0 (a) - LOAD_SMALL_INT 0 - %s - RETURN_VALUE -""" - co_list = compile('a[0]', "", "eval") - self.code_quicken(lambda: exec(co_list, {}, {'a': [0]})) - got = self.get_disassembly(co_list, adaptive=True) - self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_LIST_INT") - - co_dict = compile('a[0]', "", "eval") - self.code_quicken(lambda: exec(co_dict, {}, {'a': {0: '1'}})) - got = self.get_disassembly(co_dict, adaptive=True) - self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_DICT") - @cpython_only @requires_specialization def test_load_attr_specialize(self): diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py index 51463b6bb8c1b4..53f6d778bbecbc 100644 --- a/Lib/test/test_free_threading/test_type.py +++ b/Lib/test/test_free_threading/test_type.py @@ -124,6 +124,21 @@ def work(): for thread in threads: thread.join() + def test_object_class_change(self): + class Base: + def __init__(self): + self.attr = 123 + class ClassA(Base): + pass + class ClassB(Base): + pass + + obj = ClassA() + # keep reference to __dict__ + d = obj.__dict__ + obj.__class__ = ClassB + + def run_one(self, writer_func, reader_func): writer = Thread(target=writer_func) readers = [] diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 0372815b9bfd27..b5140057a69d36 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -31,6 +31,11 @@ def __new__(cls, *args, **kwargs): return C ContainerNoGC = None +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None + ### Support code ############################################################################### @@ -1130,6 +1135,7 @@ def setUp(self): def tearDown(self): gc.disable() + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @requires_gil_enabled("Free threading does not support incremental GC") # Use small increments to emulate longer running process in a shorter time @gc_threshold(200, 10) @@ -1167,20 +1173,15 @@ def make_ll(depth): enabled = gc.isenabled() gc.enable() olds = [] + initial_heap_size = _testinternalcapi.get_tracked_heap_size() for i in range(20_000): newhead = make_ll(20) count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: - stats = gc.get_stats() - young = stats[0] - incremental = stats[1] - old = stats[2] - collected = young['collected'] + incremental['collected'] + old['collected'] - count += CORRECTION - live = count - collected - self.assertLess(live, 25000) + new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size + self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() @@ -1322,7 +1323,8 @@ def test_refcount_errors(self): from test.support import gc_collect, SuppressCrashReport a = [1, 2, 3] - b = [a] + b = [a, a] + a.append(b) # Avoid coredump when Py_FatalError() calls abort() SuppressCrashReport().__enter__() @@ -1332,6 +1334,8 @@ def test_refcount_errors(self): # (to avoid deallocating it): import ctypes ctypes.pythonapi.Py_DecRef(ctypes.py_object(a)) + del a + del b # The garbage collector should now have a fatal error # when it reaches the broken object @@ -1360,7 +1364,7 @@ def test_refcount_errors(self): self.assertRegex(stderr, br'object type name: list') self.assertRegex(stderr, - br'object repr : \[1, 2, 3\]') + br'object repr : \[1, 2, 3, \[\[...\], \[...\]\]\]') class GCTogglingTests(unittest.TestCase): diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 0e39998ebb3783..c52b7f3e09bea1 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -1133,6 +1133,19 @@ def test_script_shadowing_stdlib_sys_path_modification(self): stdout, stderr = popen.communicate() self.assertRegex(stdout, expected_error) + def test_create_dynamic_null(self): + with self.assertRaisesRegex(ValueError, 'embedded null character'): + class Spec: + name = "a\x00b" + origin = "abc" + _imp.create_dynamic(Spec()) + + with self.assertRaisesRegex(ValueError, 'embedded null character'): + class Spec2: + name = "abc" + origin = "a\x00b" + _imp.create_dynamic(Spec2()) + @skip_if_dont_write_bytecode class FilePermissionTests(unittest.TestCase): diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py index a9befbba64daa0..01856d9bf67657 100644 --- a/Lib/test/test_interpreters/test_api.py +++ b/Lib/test/test_interpreters/test_api.py @@ -1649,6 +1649,10 @@ def test_set___main___attrs(self): self.assertIs(after2, None) self.assertEqual(after3.type.__name__, 'AssertionError') + with self.assertRaises(ValueError): + # GH-127165: Embedded NULL characters broke the lookup + _interpreters.set___main___attrs(interpid, {"\x00": 1}) + with self.subTest('from C-API'): with self.interpreter_from_capi() as interpid: with self.assertRaisesRegex(InterpreterError, 'unrecognized'): diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 1a6eac236009c3..b989b21cd9b3a9 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -617,7 +617,7 @@ def write(items): opname = "BINARY_SUBSCR_GETITEM" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_binary_subscr_list_int(self): def get_items(): items = [] @@ -1023,7 +1023,7 @@ def write(items): opname = "STORE_ATTR_WITH_HINT" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_store_subscr_list_int(self): def get_items(): items = [] @@ -1229,48 +1229,48 @@ class TestSpecializer(TestBase): @cpython_only @requires_specialization_ft def test_binary_op(self): - def f(): + def binary_op_add_int(): for _ in range(100): a, b = 1, 2 c = a + b self.assertEqual(c, 3) - f() - self.assert_specialized(f, "BINARY_OP_ADD_INT") - self.assert_no_opcode(f, "BINARY_OP") + binary_op_add_int() + self.assert_specialized(binary_op_add_int, "BINARY_OP_ADD_INT") + self.assert_no_opcode(binary_op_add_int, "BINARY_OP") - def g(): + def binary_op_add_unicode(): for _ in range(100): a, b = "foo", "bar" c = a + b self.assertEqual(c, "foobar") - g() - self.assert_specialized(g, "BINARY_OP_ADD_UNICODE") - self.assert_no_opcode(g, "BINARY_OP") + binary_op_add_unicode() + self.assert_specialized(binary_op_add_unicode, "BINARY_OP_ADD_UNICODE") + self.assert_no_opcode(binary_op_add_unicode, "BINARY_OP") @cpython_only @requires_specialization_ft def test_contain_op(self): - def f(): + def contains_op_dict(): for _ in range(100): a, b = 1, {1: 2, 2: 5} self.assertTrue(a in b) self.assertFalse(3 in b) - f() - self.assert_specialized(f, "CONTAINS_OP_DICT") - self.assert_no_opcode(f, "CONTAINS_OP") + contains_op_dict() + self.assert_specialized(contains_op_dict, "CONTAINS_OP_DICT") + self.assert_no_opcode(contains_op_dict, "CONTAINS_OP") - def g(): + def contains_op_set(): for _ in range(100): a, b = 1, {1, 2} self.assertTrue(a in b) self.assertFalse(3 in b) - g() - self.assert_specialized(g, "CONTAINS_OP_SET") - self.assert_no_opcode(g, "CONTAINS_OP") + contains_op_set() + self.assert_specialized(contains_op_set, "CONTAINS_OP_SET") + self.assert_no_opcode(contains_op_set, "CONTAINS_OP") @cpython_only @requires_specialization_ft @@ -1342,34 +1342,81 @@ def to_bool_str(): @cpython_only @requires_specialization_ft def test_unpack_sequence(self): - def f(): + def unpack_sequence_two_tuple(): for _ in range(100): a, b = 1, 2 self.assertEqual(a, 1) self.assertEqual(b, 2) - f() - self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE") - self.assert_no_opcode(f, "UNPACK_SEQUENCE") + unpack_sequence_two_tuple() + self.assert_specialized(unpack_sequence_two_tuple, + "UNPACK_SEQUENCE_TWO_TUPLE") + self.assert_no_opcode(unpack_sequence_two_tuple, "UNPACK_SEQUENCE") - def g(): + def unpack_sequence_tuple(): for _ in range(100): a, = 1, self.assertEqual(a, 1) - g() - self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE") - self.assert_no_opcode(g, "UNPACK_SEQUENCE") + unpack_sequence_tuple() + self.assert_specialized(unpack_sequence_tuple, "UNPACK_SEQUENCE_TUPLE") + self.assert_no_opcode(unpack_sequence_tuple, "UNPACK_SEQUENCE") - def x(): + def unpack_sequence_list(): for _ in range(100): a, b = [1, 2] self.assertEqual(a, 1) self.assertEqual(b, 2) - x() - self.assert_specialized(x, "UNPACK_SEQUENCE_LIST") - self.assert_no_opcode(x, "UNPACK_SEQUENCE") + unpack_sequence_list() + self.assert_specialized(unpack_sequence_list, "UNPACK_SEQUENCE_LIST") + self.assert_no_opcode(unpack_sequence_list, "UNPACK_SEQUENCE") + + @cpython_only + @requires_specialization_ft + def test_binary_subscr(self): + def binary_subscr_list_int(): + for _ in range(100): + a = [1, 2, 3] + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_list_int() + self.assert_specialized(binary_subscr_list_int, + "BINARY_SUBSCR_LIST_INT") + self.assert_no_opcode(binary_subscr_list_int, "BINARY_SUBSCR") + + def binary_subscr_tuple_int(): + for _ in range(100): + a = (1, 2, 3) + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_tuple_int() + self.assert_specialized(binary_subscr_tuple_int, + "BINARY_SUBSCR_TUPLE_INT") + self.assert_no_opcode(binary_subscr_tuple_int, "BINARY_SUBSCR") + + def binary_subscr_dict(): + for _ in range(100): + a = {1: 2, 2: 3} + self.assertEqual(a[1], 2) + self.assertEqual(a[2], 3) + + binary_subscr_dict() + self.assert_specialized(binary_subscr_dict, "BINARY_SUBSCR_DICT") + self.assert_no_opcode(binary_subscr_dict, "BINARY_SUBSCR") + + def binary_subscr_str_int(): + for _ in range(100): + a = "foobar" + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_str_int() + self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT") + self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR") + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 6a994f890da616..2c48eeeda145d0 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -546,12 +546,9 @@ def tempdir(self): self.addCleanup(os_helper.rmtree, d) return d - def test_matches_pathbase_api(self): - our_names = {name for name in dir(self.cls) if name[0] != '_'} - our_names.remove('is_reserved') # only present in PurePath + def test_matches_pathbase_docstrings(self): path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'} - self.assertEqual(our_names, path_names) - for attr_name in our_names: + for attr_name in path_names: if attr_name == 'parser': # On Windows, Path.parser is ntpath, but PathBase.parser is # posixpath, and so their docstrings differ. @@ -1357,6 +1354,17 @@ def test_symlink_to_unsupported(self): with self.assertRaises(pathlib.UnsupportedOperation): q.symlink_to(p) + @needs_symlinks + def test_lstat(self): + p = self.cls(self.base)/ 'linkA' + st = p.stat() + self.assertNotEqual(st, p.lstat()) + + def test_lstat_nosymlink(self): + p = self.cls(self.base) / 'fileA' + st = p.stat() + self.assertEqual(st, p.lstat()) + def test_is_junction(self): P = self.cls(self.base) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index aaa30a17f2af14..af94ac039808f0 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1351,7 +1351,6 @@ def test_unsupported_operation(self): p = self.cls('') e = UnsupportedOperation self.assertRaises(e, p.stat) - self.assertRaises(e, p.lstat) self.assertRaises(e, p.exists) self.assertRaises(e, p.samefile, 'foo') self.assertRaises(e, p.is_dir) @@ -1372,9 +1371,7 @@ def test_unsupported_operation(self): self.assertRaises(e, p.rglob, '*') self.assertRaises(e, lambda: list(p.walk())) self.assertRaises(e, p.absolute) - self.assertRaises(e, P.cwd) self.assertRaises(e, p.expanduser) - self.assertRaises(e, p.home) self.assertRaises(e, p.readlink) self.assertRaises(e, p.symlink_to, 'foo') self.assertRaises(e, p.hardlink_to, 'foo') @@ -2671,17 +2668,6 @@ def test_stat_no_follow_symlinks_nosymlink(self): st = p.stat() self.assertEqual(st, p.stat(follow_symlinks=False)) - @needs_symlinks - def test_lstat(self): - p = self.cls(self.base)/ 'linkA' - st = p.stat() - self.assertNotEqual(st, p.lstat()) - - def test_lstat_nosymlink(self): - p = self.cls(self.base) / 'fileA' - st = p.stat() - self.assertEqual(st, p.lstat()) - def test_is_dir(self): P = self.cls(self.base) self.assertTrue((P / 'dirA').is_dir()) @@ -2868,11 +2854,13 @@ def test_delete_dir(self): base = self.cls(self.base) base.joinpath('dirA')._delete() self.assertRaises(FileNotFoundError, base.joinpath('dirA').stat) - self.assertRaises(FileNotFoundError, base.joinpath('dirA', 'linkC').lstat) + self.assertRaises(FileNotFoundError, base.joinpath('dirA', 'linkC').stat, + follow_symlinks=False) base.joinpath('dirB')._delete() self.assertRaises(FileNotFoundError, base.joinpath('dirB').stat) self.assertRaises(FileNotFoundError, base.joinpath('dirB', 'fileB').stat) - self.assertRaises(FileNotFoundError, base.joinpath('dirB', 'linkD').lstat) + self.assertRaises(FileNotFoundError, base.joinpath('dirB', 'linkD').stat, + follow_symlinks=False) base.joinpath('dirC')._delete() self.assertRaises(FileNotFoundError, base.joinpath('dirC').stat) self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'dirD').stat) diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index e5f9848319021a..48a4c568651879 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -2931,6 +2931,22 @@ def test_pdb_issue_gh_108976(): (Pdb) continue """ +def test_pdb_issue_gh_127321(): + """See GH-127321 + breakpoint() should stop at a opcode that has a line number + >>> def test_function(): + ... import pdb; pdb_instance = pdb.Pdb(nosigint=True, readrc=False) + ... [1, 2] and pdb_instance.set_trace() + ... a = 1 + >>> with PdbTestInput([ # doctest: +NORMALIZE_WHITESPACE + ... 'continue' + ... ]): + ... test_function() + > (4)test_function() + -> a = 1 + (Pdb) continue + """ + def test_pdb_issue_gh_80731(): """See GH-80731 diff --git a/Lib/test/test_sqlite3/test_regression.py b/Lib/test/test_sqlite3/test_regression.py index db4e13222da9da..50cced3891d13a 100644 --- a/Lib/test/test_sqlite3/test_regression.py +++ b/Lib/test/test_sqlite3/test_regression.py @@ -433,6 +433,7 @@ def test_table_lock_cursor_dealloc(self): con.commit() cur = con.execute("select t from t") del cur + support.gc_collect() con.execute("drop table t") con.commit() @@ -448,6 +449,7 @@ def dup(v): con.create_function("dup", 1, dup) cur = con.execute("select dup(t) from t") del cur + support.gc_collect() con.execute("drop table t") con.commit() diff --git a/Lib/test/test_tkinter/test_misc.py b/Lib/test/test_tkinter/test_misc.py index 579ce2af9fa0bf..475edcbd5338a7 100644 --- a/Lib/test/test_tkinter/test_misc.py +++ b/Lib/test/test_tkinter/test_misc.py @@ -123,9 +123,9 @@ def test_tk_setPalette(self): def test_after(self): root = self.root - def callback(start=0, step=1): + def callback(start=0, step=1, *, end=0): nonlocal count - count = start + step + count = start + step + end # Without function, sleeps for ms. self.assertIsNone(root.after(1)) @@ -161,12 +161,18 @@ def callback(start=0, step=1): root.update() # Process all pending events. self.assertEqual(count, 53) + # Set up with callback with keyword args. + count = 0 + timer1 = root.after(0, callback, 42, step=11, end=1) + root.update() # Process all pending events. + self.assertEqual(count, 54) + def test_after_idle(self): root = self.root - def callback(start=0, step=1): + def callback(start=0, step=1, *, end=0): nonlocal count - count = start + step + count = start + step + end # Set up with callback with no args. count = 0 @@ -193,6 +199,12 @@ def callback(start=0, step=1): with self.assertRaises(tkinter.TclError): root.tk.call(script) + # Set up with callback with keyword args. + count = 0 + idle1 = root.after_idle(callback, 42, step=11, end=1) + root.update() # Process all pending events. + self.assertEqual(count, 54) + def test_after_cancel(self): root = self.root diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py index dd7b3e138f4236..bfec04bb6c1e6e 100644 --- a/Lib/tkinter/__init__.py +++ b/Lib/tkinter/__init__.py @@ -847,7 +847,7 @@ def tk_focusPrev(self): if not name: return None return self._nametowidget(name) - def after(self, ms, func=None, *args): + def after(self, ms, func=None, *args, **kw): """Call function once after given time. MS specifies the time in milliseconds. FUNC gives the @@ -861,7 +861,7 @@ def after(self, ms, func=None, *args): else: def callit(): try: - func(*args) + func(*args, **kw) finally: try: self.deletecommand(name) @@ -875,13 +875,13 @@ def callit(): name = self._register(callit) return self.tk.call('after', ms, name) - def after_idle(self, func, *args): + def after_idle(self, func, *args, **kw): """Call FUNC once if the Tcl main loop has no event to process. Return an identifier to cancel the scheduling with after_cancel.""" - return self.after('idle', func, *args) + return self.after('idle', func, *args, **kw) def after_cancel(self, id): """Cancel scheduling of function identified with ID. diff --git a/Misc/ACKS b/Misc/ACKS index cd34846574b304..fc4b83a0e2b823 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -2079,6 +2079,7 @@ Arnon Yaari Alakshendra Yadav Hirokazu Yamamoto Masayuki Yamamoto +Zhikang Yan Jingchen Ye Ka-Ping Yee Chi Hsuan Yen diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst new file mode 100644 index 00000000000000..9ef2b8dc33ed0f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst @@ -0,0 +1,4 @@ +Add a marking phase to the GC. All objects that can be transitively reached +from builtin modules or the stacks are marked as reachable before cycle +detection. This reduces the amount of work done by the GC by approximately +half. diff --git a/Misc/NEWS.d/next/Library/2024-11-16-10-52-48.gh-issue-126899.GFnfBt.rst b/Misc/NEWS.d/next/Library/2024-11-16-10-52-48.gh-issue-126899.GFnfBt.rst new file mode 100644 index 00000000000000..c1a0ed6438135d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-16-10-52-48.gh-issue-126899.GFnfBt.rst @@ -0,0 +1,2 @@ +Make tkinter widget methods :meth:`!after` and :meth:`!after_idle` accept +arguments passed by keyword. diff --git a/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst b/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst new file mode 100644 index 00000000000000..708c1a6437d812 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst @@ -0,0 +1,3 @@ +Fixed bug where, on cross-builds, the :mod:`sysconfig` POSIX data was being +generated with the host Python's ``Makefile``. The data is now generated from +current build's ``Makefile``. diff --git a/Misc/NEWS.d/next/Library/2024-11-30-21-46-15.gh-issue-127321.M78fBv.rst b/Misc/NEWS.d/next/Library/2024-11-30-21-46-15.gh-issue-127321.M78fBv.rst new file mode 100644 index 00000000000000..69b6ce68a47509 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-30-21-46-15.gh-issue-127321.M78fBv.rst @@ -0,0 +1 @@ +:func:`pdb.set_trace` will not stop at an opcode that does not have an associated line number anymore. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index a925191d479bd6..1bb71a3e80b39d 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2076,6 +2076,11 @@ has_deferred_refcount(PyObject *self, PyObject *op) return PyBool_FromLong(_PyObject_HasDeferredRefcount(op)); } +static PyObject * +get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size); +} static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, @@ -2174,6 +2179,7 @@ static PyMethodDef module_functions[] = { {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS}, {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS}, {"has_deferred_refcount", has_deferred_refcount, METH_O}, + {"get_tracked_heap_size", get_tracked_heap_size, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 49b213eaa817e2..a13d8084d14d66 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7300,7 +7300,7 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) // We could be called with an unlocked dict when the caller knows the // values are already detached, so we assert after inline values check. - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp); + ASSERT_WORLD_STOPPED_OR_OBJ_LOCKED(mp); assert(mp->ma_values->embedded == 1); assert(mp->ma_values->valid == 1); assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES); diff --git a/Objects/listobject.c b/Objects/listobject.c index bb0040cbe9f272..4b24f4a428e18b 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -391,6 +391,12 @@ PyList_GetItemRef(PyObject *op, Py_ssize_t i) return item; } +PyObject * +_PyList_GetItemRef(PyListObject *list, Py_ssize_t i) +{ + return list_get_item_ref(list, i); +} + int PyList_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ab4f07ed054385..463da06445984b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5014,21 +5014,26 @@ ctz(size_t v) #endif /* SIZEOF_SIZE_T */ return pos; } +#else +#define HAVE_CTZ 0 #endif -#if HAVE_CTZ -// load p[0]..p[size-1] as a little-endian size_t -// without unaligned access nor read ahead. +#if HAVE_CTZ && PY_LITTLE_ENDIAN +// load p[0]..p[size-1] as a size_t without unaligned access nor read ahead. static size_t load_unaligned(const unsigned char *p, size_t size) { - assert(size <= SIZEOF_SIZE_T); union { size_t s; unsigned char b[SIZEOF_SIZE_T]; } u; u.s = 0; + // This switch statement assumes little endian because: + // * union is faster than bitwise or and shift. + // * big endian machine is rare and hard to maintain. switch (size) { + default: +#if SIZEOF_SIZE_T == 8 case 8: u.b[7] = p[7]; _Py_FALLTHROUGH; @@ -5041,6 +5046,7 @@ load_unaligned(const unsigned char *p, size_t size) case 5: u.b[4] = p[4]; _Py_FALLTHROUGH; +#endif case 4: u.b[3] = p[3]; _Py_FALLTHROUGH; @@ -5055,8 +5061,6 @@ load_unaligned(const unsigned char *p, size_t size) break; case 0: break; - default: - Py_UNREACHABLE(); } return u.s; } @@ -5077,8 +5081,8 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) if (end - start >= SIZEOF_SIZE_T) { const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); +#if PY_LITTLE_ENDIAN && HAVE_CTZ if (p < p2) { -#if HAVE_CTZ #if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) // x86 and amd64 are little endian and can load unaligned memory. size_t u = *(const size_t*)p & ASCII_CHAR_MASK; @@ -5086,11 +5090,11 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK; #endif if (u) { - return p - start + (ctz(u) - 7) / 8; + return (ctz(u) - 7) / 8; } p = p2; } -#else +#else /* PY_LITTLE_ENDIAN && HAVE_CTZ */ while (p < p2) { if (*p & 0x80) { return p - start; @@ -5113,7 +5117,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) p += SIZEOF_SIZE_T; } } -#if HAVE_CTZ +#if PY_LITTLE_ENDIAN && HAVE_CTZ // we can not use *(const size_t*)p to avoid buffer overrun. size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK; if (u) { @@ -16154,7 +16158,7 @@ encode_wstr_utf8(wchar_t *wstr, char **str, const char *name) int res; res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT); if (res == -2) { - PyErr_Format(PyExc_RuntimeWarning, "cannot encode %s", name); + PyErr_Format(PyExc_RuntimeError, "cannot encode %s", name); return -1; } if (res < 0) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a14b32b8108be8..c07ec42ec68f8b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -704,7 +704,7 @@ dummy_func( }; specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT assert(frame->stackpointer == NULL); if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; @@ -713,7 +713,7 @@ dummy_func( } OPCODE_DEFERRED_INC(BINARY_SUBSCR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_BINARY_SUBSCR, (container, sub -- res)) { @@ -790,11 +790,17 @@ dummy_func( // Deopt unless 0 <= sub < PyList_Size(list) DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub)); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; +#ifdef Py_GIL_DISABLED + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + DEOPT_IF(res_o == NULL); + STAT_INC(BINARY_SUBSCR, hit); +#else DEOPT_IF(index >= PyList_GET_SIZE(list)); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); +#endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(list_st); diff --git a/Python/ceval.c b/Python/ceval.c index eba0f233a81ef3..f9514a6bf25c1b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -818,6 +818,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1; entry_frame.stackpointer = entry_frame.localsplus; entry_frame.owner = FRAME_OWNED_BY_CSTACK; + entry_frame.visited = 0; entry_frame.return_offset = 0; /* Push frame */ entry_frame.previous = tstate->current_frame; diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 7aaa045f375cf0..0a106ad636bfe8 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -342,6 +342,11 @@ _copy_string_obj_raw(PyObject *strobj, Py_ssize_t *p_size) return NULL; } + if (size != (Py_ssize_t)strlen(str)) { + PyErr_SetString(PyExc_ValueError, "found embedded NULL character"); + return NULL; + } + char *copied = PyMem_RawMalloc(size+1); if (copied == NULL) { PyErr_NoMemory(); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d46412a193332b..c91257b06cad11 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -981,6 +981,16 @@ JUMP_TO_JUMP_TARGET(); } Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (res_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(BINARY_SUBSCR, hit); + #else if (index >= PyList_GET_SIZE(list)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -989,6 +999,7 @@ PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); + #endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/gc.c b/Python/gc.c index 63adecf0e05114..5b9588c8741b97 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -106,7 +106,7 @@ gc_old_space(PyGC_Head *g) } static inline int -flip_old_space(int space) +other_space(int space) { assert(space == 0 || space == 1); return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1; @@ -430,24 +430,32 @@ validate_list(PyGC_Head *head, enum flagstates flags) #endif #ifdef GC_EXTRA_DEBUG + + static void -validate_old(GCState *gcstate) +gc_list_validate_space(PyGC_Head *head, int space) { + PyGC_Head *gc = GC_NEXT(head); + while (gc != head) { + assert(gc_old_space(gc) == space); + gc = GC_NEXT(gc); + } +} + +static void +validate_spaces(GCState *gcstate) { + int visited = gcstate->visited_space; + int not_visited = other_space(visited); + gc_list_validate_space(&gcstate->young.head, not_visited); for (int space = 0; space < 2; space++) { - PyGC_Head *head = &gcstate->old[space].head; - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - PyGC_Head *next = GC_NEXT(gc); - assert(gc_old_space(gc) == space); - gc = next; - } + gc_list_validate_space(&gcstate->old[space].head, space); } + gc_list_validate_space(&gcstate->permanent_generation.head, visited); } static void validate_consistent_old_space(PyGC_Head *head) { - PyGC_Head *prev = head; PyGC_Head *gc = GC_NEXT(head); if (gc == head) { return; @@ -457,23 +465,13 @@ validate_consistent_old_space(PyGC_Head *head) PyGC_Head *truenext = GC_NEXT(gc); assert(truenext != NULL); assert(gc_old_space(gc) == old_space); - prev = gc; gc = truenext; } - assert(prev == GC_PREV(head)); } -static void -gc_list_validate_space(PyGC_Head *head, int space) { - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - assert(gc_old_space(gc) == space); - gc = GC_NEXT(gc); - } -} #else -#define validate_old(g) do{}while(0) +#define validate_spaces(g) do{}while(0) #define validate_consistent_old_space(l) do{}while(0) #define gc_list_validate_space(l, s) do{}while(0) #endif @@ -494,7 +492,7 @@ update_refs(PyGC_Head *containers) next = GC_NEXT(gc); PyObject *op = FROM_GC(gc); if (_Py_IsImmortal(op)) { - gc_list_move(gc, &get_gc_state()->permanent_generation.head); + _PyObject_GC_UNTRACK(op); gc = next; continue; } @@ -733,13 +731,25 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) unreachable->_gc_next &= _PyGC_PREV_MASK; } +/* In theory, all tuples should be younger than the +* objects they refer to, as tuples are immortal. +* Therefore, untracking tuples in oldest-first order in the +* young generation before promoting them should have tracked +* all the tuples that can be untracked. +* +* Unfortunately, the C API allows tuples to be created +* and then filled in. So this won't untrack all tuples +* that can be untracked. It should untrack most of them +* and is much faster than a more complex approach that +* would untrack all relevant tuples. +*/ static void untrack_tuples(PyGC_Head *head) { - PyGC_Head *next, *gc = GC_NEXT(head); + PyGC_Head *gc = GC_NEXT(head); while (gc != head) { PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); + PyGC_Head *next = GC_NEXT(gc); if (PyTuple_CheckExact(op)) { _PyTuple_MaybeUntrack(op); } @@ -1293,8 +1303,10 @@ gc_collect_young(PyThreadState *tstate, struct gc_collection_stats *stats) { GCState *gcstate = &tstate->interp->gc; + validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples(young); GC_STAT_ADD(0, collections, 1); #ifdef Py_STATS { @@ -1308,39 +1320,21 @@ gc_collect_young(PyThreadState *tstate, PyGC_Head survivors; gc_list_init(&survivors); + gc_list_set_space(young, gcstate->visited_space); gc_collect_region(tstate, young, &survivors, stats); - Py_ssize_t survivor_count = 0; - if (gcstate->visited_space) { - /* objects in visited space have bit set, so we set it here */ - survivor_count = gc_list_set_space(&survivors, 1); - } - else { - PyGC_Head *gc; - for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) { -#ifdef GC_DEBUG - assert(gc_old_space(gc) == 0); -#endif - survivor_count++; - } - } - (void)survivor_count; // Silence compiler warning gc_list_merge(&survivors, visited); - validate_old(gcstate); + validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[gcstate->visited_space].count++; - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); + validate_spaces(gcstate); } #ifndef NDEBUG static inline int IS_IN_VISITED(PyGC_Head *gc, int visited_space) { - assert(visited_space == 0 || flip_old_space(visited_space) == 0); + assert(visited_space == 0 || other_space(visited_space) == 0); return gc_old_space(gc) == visited_space; } #endif @@ -1348,7 +1342,7 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space) struct container_and_flag { PyGC_Head *container; int visited_space; - uintptr_t size; + intptr_t size; }; /* A traversal callback for adding to container) */ @@ -1371,7 +1365,7 @@ visit_add_to_container(PyObject *op, void *arg) return 0; } -static uintptr_t +static intptr_t expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) { struct container_and_flag arg = { @@ -1385,6 +1379,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat * have been marked as visited */ assert(IS_IN_VISITED(gc, gcstate->visited_space)); PyObject *op = FROM_GC(gc); + assert(_PyObject_GC_IS_TRACKED(op)); if (_Py_IsImmortal(op)) { PyGC_Head *next = GC_NEXT(gc); gc_list_move(gc, &get_gc_state()->permanent_generation.head); @@ -1402,22 +1397,191 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat /* Do bookkeeping for a completed GC cycle */ static void -completed_cycle(GCState *gcstate) -{ -#ifdef Py_DEBUG - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - assert(gc_list_is_empty(not_visited)); -#endif - gcstate->visited_space = flip_old_space(gcstate->visited_space); - /* Make sure all young objects have old space bit set correctly */ - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *gc = GC_NEXT(young); - while (gc != young) { - PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, gcstate->visited_space); - gc = next; +completed_scavenge(GCState *gcstate) +{ + /* We must observe two invariants: + * 1. Members of the permanent generation must be marked visited. + * 2. We cannot touch members of the permanent generation. */ + int visited; + if (gc_list_is_empty(&gcstate->permanent_generation.head)) { + /* Permanent generation is empty so we can flip spaces bit */ + int not_visited = gcstate->visited_space; + visited = other_space(not_visited); + gcstate->visited_space = visited; + /* Make sure all objects have visited bit set correctly */ + gc_list_set_space(&gcstate->young.head, not_visited); } + else { + /* We must move the objects from visited to pending space. */ + visited = gcstate->visited_space; + int not_visited = other_space(visited); + assert(gc_list_is_empty(&gcstate->old[not_visited].head)); + gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head); + gc_list_set_space(&gcstate->old[not_visited].head, not_visited); + } + assert(gc_list_is_empty(&gcstate->old[visited].head)); gcstate->work_to_do = 0; + gcstate->phase = GC_PHASE_MARK; +} + +static intptr_t +move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, reachable); + return 1; + } + } + return 0; +} + +static intptr_t +mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +{ + // Transitively traverse all objects from reachable, until empty + struct container_and_flag arg = { + .container = reachable, + .visited_space = visited_space, + .size = 0 + }; + while (!gc_list_is_empty(reachable)) { + PyGC_Head *gc = _PyGCHead_NEXT(reachable); + assert(gc_old_space(gc) == visited_space); + gc_list_move(gc, visited); + PyObject *op = FROM_GC(gc); + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); + } + gc_list_validate_space(visited, visited_space); + return arg.size; +} + +static intptr_t +mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + // Move all objects on stacks to reachable + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + PyThreadState* ts = PyInterpreterState_ThreadHead(interp); + HEAD_UNLOCK(runtime); + while (ts) { + _PyInterpreterFrame *frame = ts->current_frame; + while (frame) { + if (frame->owner == FRAME_OWNED_BY_CSTACK) { + frame = frame->previous; + continue; + } + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, &reachable, visited_space); + while (sp > locals) { + sp--; + if (PyStackRef_IsNull(*sp)) { + continue; + } + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + objects_marked++; + gc_list_move(gc, &reachable); + } + } + } + if (!start && frame->visited) { + // If this frame has already been visited, then the lower frames + // will have already been visited and will not have changed + break; + } + frame->visited = 1; + frame = frame->previous; + } + HEAD_LOCK(runtime); + ts = PyThreadState_Next(ts); + HEAD_UNLOCK(runtime); + } + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static intptr_t +mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); + objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); + objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + struct types_state *types = &interp->types; + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + } + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + } + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static intptr_t +mark_at_start(PyThreadState *tstate) +{ + // TO DO -- Make this incremental + GCState *gcstate = &tstate->interp->gc; + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); + objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + gcstate->work_to_do -= objects_marked; + gcstate->phase = GC_PHASE_COLLECT; + validate_spaces(gcstate); + return objects_marked; +} + +static intptr_t +assess_work_to_do(GCState *gcstate) +{ + /* The amount of work we want to do depends on three things. + * 1. The number of new objects created + * 2. The growth in heap size since the last collection + * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + * + * For a steady state heap, the amount of work to do is three times the number + * of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. + * + * This could be improved by tracking survival rates, but it is still a + * large improvement on the non-marking approach. + */ + intptr_t scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } + intptr_t new_objects = gcstate->young.count; + intptr_t max_heap_fraction = new_objects*3/2; + intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_fraction > max_heap_fraction) { + heap_fraction = max_heap_fraction; + } + gcstate->young.count = 0; + return new_objects + heap_fraction; } static void @@ -1425,18 +1589,30 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; + gcstate->work_to_do += assess_work_to_do(gcstate); + untrack_tuples(&gcstate->young.head); + if (gcstate->phase == GC_PHASE_MARK) { + Py_ssize_t objects_marked = mark_at_start(tstate); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; + validate_spaces(gcstate); + return; + } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; gc_list_init(&increment); - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } + int scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } + intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &increment); - gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = 0; + Py_ssize_t increment_size = gc_list_size(&increment); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; @@ -1444,54 +1620,56 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head *gc = _PyGCHead_NEXT(not_visited); gc_list_move(gc, &increment); increment_size++; + assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } + GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); gc_collect_region(tstate, &increment, &survivors, stats); - gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; - validate_old(gcstate); add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { - completed_cycle(gcstate); + completed_scavenge(gcstate); } + validate_spaces(gcstate); } - static void gc_collect_full(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(2, collections, 1); GCState *gcstate = &tstate->interp->gc; - validate_old(gcstate); + validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples(young); /* merge all generations into visited */ - gc_list_validate_space(young, gcstate->visited_space); - gc_list_set_space(pending, gcstate->visited_space); gc_list_merge(young, pending); + gc_list_validate_space(pending, 1-gcstate->visited_space); + gc_list_set_space(pending, gcstate->visited_space); gcstate->young.count = 0; gc_list_merge(pending, visited); + validate_spaces(gcstate); gc_collect_region(tstate, visited, visited, stats); + validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; - - gcstate->work_to_do = - gcstate->young.threshold * 2; + completed_scavenge(gcstate); _PyGC_ClearAllFreeLists(tstate->interp); - validate_old(gcstate); + validate_spaces(gcstate); add_stats(gcstate, 2, stats); } @@ -1733,20 +1911,23 @@ void _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - /* The permanent_generation has its old space bit set to zero */ - if (gcstate->visited_space) { - gc_list_set_space(&gcstate->young.head, 0); - } + /* The permanent_generation must be visited */ + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); gcstate->young.count = 0; PyGC_Head*old0 = &gcstate->old[0].head; PyGC_Head*old1 = &gcstate->old[1].head; + if (gcstate->visited_space) { + gc_list_set_space(old0, 1); + } + else { + gc_list_set_space(old1, 0); + } gc_list_merge(old0, &gcstate->permanent_generation.head); gcstate->old[0].count = 0; - gc_list_set_space(old1, 0); gc_list_merge(old1, &gcstate->permanent_generation.head); gcstate->old[1].count = 0; - validate_old(gcstate); + validate_spaces(gcstate); } void @@ -1754,8 +1935,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; gc_list_merge(&gcstate->permanent_generation.head, - &gcstate->old[0].head); - validate_old(gcstate); + &gcstate->old[gcstate->visited_space].head); + validate_spaces(gcstate); } Py_ssize_t @@ -1860,7 +2041,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) _Py_stats->object_stats.object_visits = 0; } #endif - validate_old(gcstate); + validate_spaces(gcstate); _Py_atomic_store_int(&gcstate->collecting, 0); return stats.uncollectable + stats.collected; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c9a5132269398c..45bcc4242af9d7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -433,7 +433,7 @@ container = stack_pointer[-2]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT assert(frame->stackpointer == NULL); if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; @@ -444,7 +444,7 @@ } OPCODE_DEFERRED_INC(BINARY_SUBSCR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _BINARY_SUBSCR { @@ -577,11 +577,19 @@ // Deopt unless 0 <= sub < PyList_Size(list) DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + stack_pointer = _PyFrame_GetStackPointer(frame); + DEOPT_IF(res_o == NULL, BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); + #else DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); + #endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/import.c b/Python/import.c index 09fe95fa1fb647..b3c384c27718ce 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1157,12 +1157,14 @@ del_extensions_cache_value(struct extensions_cache_value *value) static void * hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) { - Py_ssize_t str1_len, str2_len; - const char *str1_data = PyUnicode_AsUTF8AndSize(str1, &str1_len); - const char *str2_data = PyUnicode_AsUTF8AndSize(str2, &str2_len); + const char *str1_data = _PyUnicode_AsUTF8NoNUL(str1); + const char *str2_data = _PyUnicode_AsUTF8NoNUL(str2); if (str1_data == NULL || str2_data == NULL) { return NULL; } + Py_ssize_t str1_len = strlen(str1_data); + Py_ssize_t str2_len = strlen(str2_data); + /* Make sure sep and the NULL byte won't cause an overflow. */ assert(SIZE_MAX - str1_len - str2_len > 2); size_t size = str1_len + 1 + str2_len + 1; diff --git a/Python/specialize.c b/Python/specialize.c index 172dae7d374602..504eef4f448429 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -231,6 +231,8 @@ print_gc_stats(FILE *out, GCStats *stats) fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections); fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits); fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected); + fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable); + fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable); } } @@ -1717,15 +1719,15 @@ _Py_Specialize_BinarySubscr( PyObject *container = PyStackRef_AsPyObjectBorrow(container_st); PyObject *sub = PyStackRef_AsPyObjectBorrow(sub_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[BINARY_SUBSCR] == INLINE_CACHE_ENTRIES_BINARY_SUBSCR); - _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1); PyTypeObject *container_type = Py_TYPE(container); + uint8_t specialized_op; if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_LIST_INT; + specialized_op = BINARY_SUBSCR_LIST_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1738,7 +1740,7 @@ _Py_Specialize_BinarySubscr( if (container_type == &PyTuple_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_TUPLE_INT; + specialized_op = BINARY_SUBSCR_TUPLE_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1751,7 +1753,7 @@ _Py_Specialize_BinarySubscr( if (container_type == &PyUnicode_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_STR_INT; + specialized_op = BINARY_SUBSCR_STR_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1762,9 +1764,10 @@ _Py_Specialize_BinarySubscr( goto fail; } if (container_type == &PyDict_Type) { - instr->op.code = BINARY_SUBSCR_DICT; + specialized_op = BINARY_SUBSCR_DICT; goto success; } +#ifndef Py_GIL_DISABLED PyTypeObject *cls = Py_TYPE(container); PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { @@ -1797,21 +1800,17 @@ _Py_Specialize_BinarySubscr( // struct _specialization_cache): ht->_spec_cache.getitem = descriptor; ht->_spec_cache.getitem_version = version; - instr->op.code = BINARY_SUBSCR_GETITEM; + specialized_op = BINARY_SUBSCR_GETITEM; goto success; } +#endif // Py_GIL_DISABLED SPECIALIZATION_FAIL(BINARY_SUBSCR, binary_subscr_fail_kind(container_type, sub)); fail: - STAT_INC(BINARY_SUBSCR, failure); - assert(!PyErr_Occurred()); - instr->op.code = BINARY_SUBSCR; - cache->counter = adaptive_counter_backoff(cache->counter); + unspecialize(instr); return; success: - STAT_INC(BINARY_SUBSCR, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + specialize(instr, specialized_op); } diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 5793e5c649d6b3..bc7ccfe33e777d 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -483,7 +483,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]: ): (trace_too_long, attempts), Doc( "Trace too short", - "A potential trace is abandoced because it it too short.", + "A potential trace is abandoned because it it too short.", ): (trace_too_short, attempts), Doc( "Inner loop found", "A trace is truncated because it has an inner loop" @@ -1118,6 +1118,8 @@ def calc_gc_stats(stats: Stats) -> Rows: Count(gen["collections"]), Count(gen["objects collected"]), Count(gen["object visits"]), + Count(gen["objects reachable from roots"]), + Count(gen["objects not reachable from roots"]), ) for (i, gen) in enumerate(gc_stats) ] @@ -1127,7 +1129,8 @@ def calc_gc_stats(stats: Stats) -> Rows: "GC collections and effectiveness", [ Table( - ("Generation:", "Collections:", "Objects collected:", "Object visits:"), + ("Generation:", "Collections:", "Objects collected:", "Object visits:", + "Reachable from roots:", "Not reachable from roots:"), calc_gc_stats, ) ], diff --git a/configure b/configure index 84b74ac3584bcd..4e4043260ed2df 100755 --- a/configure +++ b/configure @@ -944,8 +944,8 @@ AR LINK_PYTHON_OBJS LINK_PYTHON_DEPS LIBRARY_DEPS -NODE HOSTRUNNER +NODE STATIC_LIBPYTHON GNULD EXPORTSFROM @@ -1147,7 +1147,6 @@ LDFLAGS LIBS CPPFLAGS CPP -HOSTRUNNER PROFILE_TASK BOLT_INSTRUMENT_FLAGS BOLT_APPLY_FLAGS @@ -1968,7 +1967,6 @@ Some influential environment variables: CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor - HOSTRUNNER Program to run CPython for the host platform PROFILE_TASK Python args for PGO generation task BOLT_INSTRUMENT_FLAGS @@ -7622,9 +7620,9 @@ if test "$cross_compiling" = yes; then RUNSHARED= fi +# HOSTRUNNER - Program to run CPython for the host platform { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking HOSTRUNNER" >&5 printf %s "checking HOSTRUNNER... " >&6; } - if test -z "$HOSTRUNNER" then case $ac_sys_system in #( diff --git a/configure.ac b/configure.ac index 8fa6cb60900ad1..4cfced10432491 100644 --- a/configure.ac +++ b/configure.ac @@ -1609,8 +1609,8 @@ if test "$cross_compiling" = yes; then RUNSHARED= fi +# HOSTRUNNER - Program to run CPython for the host platform AC_MSG_CHECKING([HOSTRUNNER]) -AC_ARG_VAR([HOSTRUNNER], [Program to run CPython for the host platform]) if test -z "$HOSTRUNNER" then AS_CASE([$ac_sys_system],