diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml
index 7dbbe71b2131e7..4ef543d7369734 100644
--- a/.github/workflows/jit.yml
+++ b/.github/workflows/jit.yml
@@ -104,7 +104,7 @@ jobs:
 
         # The `find` line is required as a result of https://github.com/actions/runner-images/issues/9966.
         # This is a bug in the macOS runner image where the pre-installed Python is installed in the same
-        # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes 
+        # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes
         # the symlink to the pre-installed Python so that the Homebrew Python is used instead.
       - name: Native macOS
         if: runner.os == 'macOS'
diff --git a/Doc/Makefile b/Doc/Makefile
index 22e43ee3e542ee..4a704ad58b33d3 100644
--- a/Doc/Makefile
+++ b/Doc/Makefile
@@ -144,7 +144,7 @@ pydoc-topics: build
 
 .PHONY: gettext
 gettext: BUILDER = gettext
-gettext: SPHINXOPTS += -d build/doctrees-gettext
+gettext: override SPHINXOPTS := -d build/doctrees-gettext $(SPHINXOPTS)
 gettext: build
 
 .PHONY: htmlview
diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst
index 86d3967d9fb577..444b3456f051d8 100644
--- a/Doc/c-api/type.rst
+++ b/Doc/c-api/type.rst
@@ -529,19 +529,19 @@ The following functions and structs are used to create
 
       The following “offset” fields cannot be set using :c:type:`PyType_Slot`:
 
-         * :c:member:`~PyTypeObject.tp_weaklistoffset`
-           (use :c:macro:`Py_TPFLAGS_MANAGED_WEAKREF` instead if possible)
-         * :c:member:`~PyTypeObject.tp_dictoffset`
-           (use :c:macro:`Py_TPFLAGS_MANAGED_DICT` instead if possible)
-         * :c:member:`~PyTypeObject.tp_vectorcall_offset`
-           (use ``"__vectorcalloffset__"`` in
-           :ref:`PyMemberDef <pymemberdef-offsets>`)
-
-         If it is not possible to switch to a ``MANAGED`` flag (for example,
-         for vectorcall or to support Python older than 3.12), specify the
-         offset in :c:member:`Py_tp_members <PyTypeObject.tp_members>`.
-         See :ref:`PyMemberDef documentation <pymemberdef-offsets>`
-         for details.
+      * :c:member:`~PyTypeObject.tp_weaklistoffset`
+        (use :c:macro:`Py_TPFLAGS_MANAGED_WEAKREF` instead if possible)
+      * :c:member:`~PyTypeObject.tp_dictoffset`
+        (use :c:macro:`Py_TPFLAGS_MANAGED_DICT` instead if possible)
+      * :c:member:`~PyTypeObject.tp_vectorcall_offset`
+        (use ``"__vectorcalloffset__"`` in
+        :ref:`PyMemberDef <pymemberdef-offsets>`)
+
+      If it is not possible to switch to a ``MANAGED`` flag (for example,
+      for vectorcall or to support Python older than 3.12), specify the
+      offset in :c:member:`Py_tp_members <PyTypeObject.tp_members>`.
+      See :ref:`PyMemberDef documentation <pymemberdef-offsets>`
+      for details.
 
       The following internal fields cannot be set at all when creating a heap
       type:
@@ -557,20 +557,18 @@ The following functions and structs are used to create
       To avoid issues, use the *bases* argument of
       :c:func:`PyType_FromSpecWithBases` instead.
 
-     .. versionchanged:: 3.9
-
-        Slots in :c:type:`PyBufferProcs` may be set in the unlimited API.
+      .. versionchanged:: 3.9
+         Slots in :c:type:`PyBufferProcs` may be set in the unlimited API.
 
-     .. versionchanged:: 3.11
-        :c:member:`~PyBufferProcs.bf_getbuffer` and
-        :c:member:`~PyBufferProcs.bf_releasebuffer` are now available
-        under the :ref:`limited API <limited-c-api>`.
+      .. versionchanged:: 3.11
+         :c:member:`~PyBufferProcs.bf_getbuffer` and
+         :c:member:`~PyBufferProcs.bf_releasebuffer` are now available
+         under the :ref:`limited API <limited-c-api>`.
 
-     .. versionchanged:: 3.14
-
-        The field :c:member:`~PyTypeObject.tp_vectorcall` can now set
-        using ``Py_tp_vectorcall``.  See the field's documentation
-        for details.
+      .. versionchanged:: 3.14
+         The field :c:member:`~PyTypeObject.tp_vectorcall` can now set
+         using ``Py_tp_vectorcall``.  See the field's documentation
+         for details.
 
    .. c:member:: void *pfunc
 
diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat
index d49fd7540a5162..7d8cc0adbc2df0 100644
--- a/Doc/data/refcounts.dat
+++ b/Doc/data/refcounts.dat
@@ -180,7 +180,7 @@ PyCapsule_IsValid:const char*:name::
 PyCapsule_New:PyObject*::+1:
 PyCapsule_New:void*:pointer::
 PyCapsule_New:const char *:name::
-PyCapsule_New::void (* destructor)(PyObject* )::
+PyCapsule_New:void (*)(PyObject *):destructor::
 
 PyCapsule_SetContext:int:::
 PyCapsule_SetContext:PyObject*:self:0:
@@ -349,11 +349,11 @@ PyComplex_CheckExact:int:::
 PyComplex_CheckExact:PyObject*:p:0:
 
 PyComplex_FromCComplex:PyObject*::+1:
-PyComplex_FromCComplex::Py_complex v::
+PyComplex_FromCComplex:Py_complex:v::
 
 PyComplex_FromDoubles:PyObject*::+1:
-PyComplex_FromDoubles::double real::
-PyComplex_FromDoubles::double imag::
+PyComplex_FromDoubles:double:real::
+PyComplex_FromDoubles:double:imag::
 
 PyComplex_ImagAsDouble:double:::
 PyComplex_ImagAsDouble:PyObject*:op:0:
@@ -620,7 +620,9 @@ PyErr_GetExcInfo:PyObject**:pvalue:+1:
 PyErr_GetExcInfo:PyObject**:ptraceback:+1:
 
 PyErr_GetRaisedException:PyObject*::+1:
-PyErr_SetRaisedException::::
+
+PyErr_SetRaisedException:void:::
+PyErr_SetRaisedException:PyObject *:exc:0:stolen
 
 PyErr_GivenExceptionMatches:int:::
 PyErr_GivenExceptionMatches:PyObject*:given:0:
@@ -640,9 +642,9 @@ PyErr_NewExceptionWithDoc:PyObject*:dict:0:
 PyErr_NoMemory:PyObject*::null:
 
 PyErr_NormalizeException:void:::
-PyErr_NormalizeException:PyObject**:exc::???
-PyErr_NormalizeException:PyObject**:val::???
-PyErr_NormalizeException:PyObject**:tb::???
+PyErr_NormalizeException:PyObject**:exc:+1:???
+PyErr_NormalizeException:PyObject**:val:+1:???
+PyErr_NormalizeException:PyObject**:tb:+1:???
 
 PyErr_Occurred:PyObject*::0:
 
@@ -1314,7 +1316,7 @@ PyMapping_GetItemString:const char*:key::
 
 PyMapping_HasKey:int:::
 PyMapping_HasKey:PyObject*:o:0:
-PyMapping_HasKey:PyObject*:key::
+PyMapping_HasKey:PyObject*:key:0:
 
 PyMapping_HasKeyString:int:::
 PyMapping_HasKeyString:PyObject*:o:0:
@@ -1474,7 +1476,7 @@ PyModule_GetState:void*:::
 PyModule_GetState:PyObject*:module:0:
 
 PyModule_New:PyObject*::+1:
-PyModule_New::char* name::
+PyModule_New:char*:name::
 
 PyModule_NewObject:PyObject*::+1:
 PyModule_NewObject:PyObject*:name:+1:
@@ -1984,10 +1986,10 @@ PyRun_StringFlags:PyObject*:locals:0:
 PyRun_StringFlags:PyCompilerFlags*:flags::
 
 PySeqIter_Check:int:::
-PySeqIter_Check::op::
+PySeqIter_Check:PyObject *:op:0:
 
 PySeqIter_New:PyObject*::+1:
-PySeqIter_New:PyObject*:seq::
+PySeqIter_New:PyObject*:seq:0:
 
 PySequence_Check:int:::
 PySequence_Check:PyObject*:o:0:
@@ -2421,7 +2423,7 @@ PyUnicode_GET_LENGTH:PyObject*:o:0:
 PyUnicode_KIND:int:::
 PyUnicode_KIND:PyObject*:o:0:
 
-PyUnicode_MAX_CHAR_VALUE::::
+PyUnicode_MAX_CHAR_VALUE:Py_UCS4:::
 PyUnicode_MAX_CHAR_VALUE:PyObject*:o:0:
 
 Py_UNICODE_ISALNUM:int:::
@@ -2488,7 +2490,7 @@ PyUnicode_FromWideChar:const wchar_t*:w::
 PyUnicode_FromWideChar:Py_ssize_t:size::
 
 PyUnicode_AsWideChar:Py_ssize_t:::
-PyUnicode_AsWideChar:PyObject*:*unicode:0:
+PyUnicode_AsWideChar:PyObject*:unicode:0:
 PyUnicode_AsWideChar:wchar_t*:w::
 PyUnicode_AsWideChar:Py_ssize_t:size::
 
@@ -2541,7 +2543,7 @@ PyUnicode_AsUTF8String:PyObject*:unicode:0:
 
 PyUnicode_AsUTF8AndSize:const char*:::
 PyUnicode_AsUTF8AndSize:PyObject*:unicode:0:
-PyUnicode_AsUTF8AndSize:Py_ssize_t*:size:0:
+PyUnicode_AsUTF8AndSize:Py_ssize_t*:size::
 
 PyUnicode_AsUTF8:const char*:::
 PyUnicode_AsUTF8:PyObject*:unicode:0:
@@ -2864,13 +2866,13 @@ PyUnicodeDecodeError_SetStart:PyObject*:exc:0:
 PyUnicodeDecodeError_SetStart:Py_ssize_t:start::
 
 PyWeakref_Check:int:::
-PyWeakref_Check:PyObject*:ob::
+PyWeakref_Check:PyObject*:ob:0:
 
 PyWeakref_CheckProxy:int:::
-PyWeakref_CheckProxy:PyObject*:ob::
+PyWeakref_CheckProxy:PyObject*:ob:0:
 
 PyWeakref_CheckRef:int:::
-PyWeakref_CheckRef:PyObject*:ob::
+PyWeakref_CheckRef:PyObject*:ob:0:
 
 PyWeakref_GET_OBJECT:PyObject*::0:
 PyWeakref_GET_OBJECT:PyObject*:ref:0:
diff --git a/Doc/library/asyncio-sync.rst b/Doc/library/asyncio-sync.rst
index 3cf8e2737e85dc..77c2e97da11990 100644
--- a/Doc/library/asyncio-sync.rst
+++ b/Doc/library/asyncio-sync.rst
@@ -259,8 +259,8 @@ Condition
 
       Note that a task *may* return from this call spuriously,
       which is why the caller should always re-check the state
-      and be prepared to :meth:`wait` again. For this reason, you may
-      prefer to use :meth:`wait_for` instead.
+      and be prepared to :meth:`~Condition.wait` again. For this reason, you may
+      prefer to use :meth:`~Condition.wait_for` instead.
 
    .. coroutinemethod:: wait_for(predicate)
 
@@ -268,7 +268,7 @@ Condition
 
       The predicate must be a callable which result will be
       interpreted as a boolean value.  The method will repeatedly
-      :meth:`wait` until the predicate evaluates to *true*. The final value is the
+      :meth:`~Condition.wait` until the predicate evaluates to *true*. The final value is the
       return value.
 
 
@@ -434,7 +434,7 @@ Barrier
    .. coroutinemethod:: abort()
 
       Put the barrier into a broken state.  This causes any active or future
-      calls to :meth:`wait` to fail with the :class:`BrokenBarrierError`.
+      calls to :meth:`~Barrier.wait` to fail with the :class:`BrokenBarrierError`.
       Use this for example if one of the tasks needs to abort, to avoid infinite
       waiting tasks.
 
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 869a47c1261293..f9322da3d4fbb0 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -576,6 +576,14 @@ sys
   from other interpreters than the one it's called in.
 
 
+tkinter
+-------
+
+* Make tkinter widget methods :meth:`!after` and :meth:`!after_idle` accept
+  arguments passed by keyword.
+  (Contributed by Zhikang Yan in :gh:`126899`.)
+
+
 unicodedata
 -----------
 
diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h
index f1ca54839fbc38..2ae48002d720e9 100644
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@@ -99,6 +99,8 @@ typedef struct _gc_stats {
     uint64_t collections;
     uint64_t object_visits;
     uint64_t objects_collected;
+    uint64_t objects_transitively_reachable;
+    uint64_t objects_not_transitively_reachable;
 } GCStats;
 
 typedef struct _uop_stats {
diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h
index 8c0100390d036e..b786c5f49e9831 100644
--- a/Include/internal/pycore_frame.h
+++ b/Include/internal/pycore_frame.h
@@ -75,6 +75,7 @@ typedef struct _PyInterpreterFrame {
     _PyStackRef *stackpointer;
     uint16_t return_offset;  /* Only relevant during a function call */
     char owner;
+    char visited;
     /* Locals and stack */
     _PyStackRef localsplus[1];
 } _PyInterpreterFrame;
@@ -207,6 +208,7 @@ _PyFrame_Initialize(
 #endif
     frame->return_offset = 0;
     frame->owner = FRAME_OWNED_BY_THREAD;
+    frame->visited = 0;
 
     for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
         frame->localsplus[i] = PyStackRef_NULL;
@@ -389,6 +391,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
     frame->instr_ptr = _PyCode_CODE(code);
 #endif
     frame->owner = FRAME_OWNED_BY_THREAD;
+    frame->visited = 0;
     frame->return_offset = 0;
 
 #ifdef Py_GIL_DISABLED
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index 479fe10d00066d..4ff34bf8ead7d0 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -10,11 +10,11 @@ extern "C" {
 
 /* GC information is stored BEFORE the object structure. */
 typedef struct {
-    // Pointer to next object in the list.
+    // Tagged pointer to next object in the list.
     // 0 means the object is not tracked
     uintptr_t _gc_next;
 
-    // Pointer to previous object in the list.
+    // Tagged pointer to previous object in the list.
     // Lowest two bits are used for flags documented later.
     uintptr_t _gc_prev;
 } PyGC_Head;
@@ -284,6 +284,11 @@ struct gc_generation_stats {
     Py_ssize_t uncollectable;
 };
 
+enum _GCPhase {
+    GC_PHASE_MARK = 0,
+    GC_PHASE_COLLECT = 1
+};
+
 struct _gc_runtime_state {
     /* List of objects that still need to be cleaned up, singly linked
      * via their gc headers' gc_prev pointers.  */
@@ -311,6 +316,7 @@ struct _gc_runtime_state {
     Py_ssize_t work_to_do;
     /* Which of the old spaces is the visited space */
     int visited_space;
+    int phase;
 
 #ifdef Py_GIL_DISABLED
     /* This is the number of objects that survived the last full
diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h
index 2c666f9be4bd79..f03e484f5ef8b0 100644
--- a/Include/internal/pycore_list.h
+++ b/Include/internal/pycore_list.h
@@ -10,6 +10,9 @@ extern "C" {
 
 PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *);
 extern void _PyList_DebugMallocStats(FILE *out);
+// _PyList_GetItemRef should be used only when the object is known as a list
+// because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does.
+extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i);
 
 #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item)
 
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index 34d835a7f84ee7..c52ed8f14707ba 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -471,8 +471,8 @@ static inline void _PyObject_GC_TRACK(
     PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
     _PyGCHead_SET_NEXT(last, gc);
     _PyGCHead_SET_PREV(gc, last);
-    /* Young objects will be moved into the visited space during GC, so set the bit here */
-    gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space;
+    uintptr_t not_visited = 1 ^ interp->gc.visited_space;
+    gc->_gc_next = ((uintptr_t)generation0) | not_visited;
     generation0->_gc_prev = (uintptr_t)gc;
 #endif
 }
diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h
index 5ce172856e1b19..d63c8df8ca6690 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -1952,7 +1952,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = {
     [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
-    [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
+    [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG },
     [BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
     [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG },
     [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG },
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index 9f6748945bab36..1260b957ce9482 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -137,6 +137,7 @@ extern PyTypeObject _PyExc_MemoryError;
                 { .threshold = 0, }, \
             }, \
             .work_to_do = -5000, \
+            .phase = GC_PHASE_MARK, \
         }, \
         .qsbr = { \
             .wr_seq = QSBR_INITIAL, \
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
index 1c1f478c3833c8..1825bb3a5abc80 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -84,7 +84,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
-    [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG,
+    [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
     [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG,
     [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG,
     [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
diff --git a/InternalDocs/README.md b/InternalDocs/README.md
index f6aa3db3b384af..8cdd06d189f362 100644
--- a/InternalDocs/README.md
+++ b/InternalDocs/README.md
@@ -1,4 +1,3 @@
-
 # CPython Internals Documentation
 
 The documentation in this folder is intended for CPython maintainers.
diff --git a/InternalDocs/adaptive.md b/InternalDocs/adaptive.md
index 4ae9e85b387f39..7cfa8e52310460 100644
--- a/InternalDocs/adaptive.md
+++ b/InternalDocs/adaptive.md
@@ -96,6 +96,7 @@ quality of specialization and keeping the overhead of specialization low.
 Specialized instructions must be fast. In order to be fast,
 specialized instructions should be tailored for a particular
 set of values that allows them to:
+
 1. Verify that incoming value is part of that set with low overhead.
 2. Perform the operation quickly.
 
@@ -107,9 +108,11 @@ For example, `LOAD_GLOBAL_MODULE` is specialized for `globals()`
 dictionaries that have a keys with the expected version.
 
 This can be tested quickly:
+
 * `globals->keys->dk_version == expected_version`
 
 and the operation can be performed quickly:
+
 * `value = entries[cache->index].me_value;`.
 
 Because it is impossible to measure the performance of an instruction without
@@ -122,10 +125,11 @@ base instruction.
 ### Implementation of specialized instructions
 
 In general, specialized instructions should be implemented in two parts:
+
 1. A sequence of guards, each of the form
-  `DEOPT_IF(guard-condition-is-false, BASE_NAME)`.
+   `DEOPT_IF(guard-condition-is-false, BASE_NAME)`.
 2. The operation, which should ideally have no branches and
-  a minimum number of dependent memory accesses.
+   a minimum number of dependent memory accesses.
 
 In practice, the parts may overlap, as data required for guards
 can be re-used in the operation.
diff --git a/InternalDocs/changing_grammar.md b/InternalDocs/changing_grammar.md
index 1a5eebdc1418dc..c6b895135a360d 100644
--- a/InternalDocs/changing_grammar.md
+++ b/InternalDocs/changing_grammar.md
@@ -32,7 +32,7 @@ Below is a checklist of things that may need to change.
   [`Include/internal/pycore_ast.h`](../Include/internal/pycore_ast.h) and
   [`Python/Python-ast.c`](../Python/Python-ast.c).
 
-* [`Parser/lexer/`](../Parser/lexer/) contains the tokenization code.
+* [`Parser/lexer/`](../Parser/lexer) contains the tokenization code.
   This is where you would add a new type of comment or string literal, for example.
 
 * [`Python/ast.c`](../Python/ast.c) will need changes to validate AST objects
@@ -60,4 +60,4 @@ Below is a checklist of things that may need to change.
   to the tokenizer.
 
 * Documentation must be written! Specifically, one or more of the pages in
-  [`Doc/reference/`](../Doc/reference/) will need to be updated.
+  [`Doc/reference/`](../Doc/reference) will need to be updated.
diff --git a/InternalDocs/code_objects.md b/InternalDocs/code_objects.md
index bee4a9d0a08915..d4e28c6b238b48 100644
--- a/InternalDocs/code_objects.md
+++ b/InternalDocs/code_objects.md
@@ -1,4 +1,3 @@
-
 # Code objects
 
 A `CodeObject` is a builtin Python type that represents a compiled executable,
@@ -43,7 +42,7 @@ so a compact format is very important.
 Note that traceback objects don't store all this information -- they store the start line
 number, for backward compatibility, and the "last instruction" value.
 The rest can be computed from the last instruction (`tb_lasti`) with the help of the
-locations table.  For Python code, there is a convenience method
+locations table. For Python code, there is a convenience method
 (`codeobject.co_positions`)[https://docs.python.org/dev/reference/datamodel.html#codeobject.co_positions]
 which returns an iterator of `({line}, {endline}, {column}, {endcolumn})` tuples,
 one per instruction.
@@ -75,9 +74,11 @@ returned by the `co_positions()` iterator.
 > See [`Objects/lnotab_notes.txt`](../Objects/lnotab_notes.txt) for more details.
 
 `co_linetable` consists of a sequence of location entries.
-Each entry starts with a byte with the most significant bit set, followed by zero or more bytes with the most significant bit unset.
+Each entry starts with a byte with the most significant bit set, followed by
+zero or more bytes with the most significant bit unset.
 
 Each entry contains the following information:
+
 * The number of code units covered by this entry (length)
 * The start line
 * The end line
@@ -86,54 +87,88 @@ Each entry contains the following information:
 
 The first byte has the following format:
 
-Bit 7 | Bits 3-6 | Bits 0-2
- ---- | ---- | ----
- 1 | Code | Length (in code units) - 1
+| Bit 7 | Bits 3-6 | Bits 0-2                   |
+|-------|----------|----------------------------|
+| 1     | Code     | Length (in code units) - 1 |
 
 The codes are enumerated in the `_PyCodeLocationInfoKind` enum.
 
-## Variable-length integer encodings
+### Variable-length integer encodings
 
-Integers are often encoded using a variable-length integer encoding
+Integers are often encoded using a variable length integer encoding
 
-### Unsigned integers (`varint`)
+#### Unsigned integers (`varint`)
 
 Unsigned integers are encoded in 6-bit chunks, least significant first.
 Each chunk but the last has bit 6 set.
 For example:
 
 * 63 is encoded as `0x3f`
-* 200 is encoded as `0x48`, `0x03`
+* 200 is encoded as `0x48`, `0x03` since ``200 = (0x03 << 6) | 0x48``.
+
+The following helper can be used to convert an integer into a `varint`:
+
+```py
+def encode_varint(s):
+    ret = []
+    while s >= 64:
+        ret.append(((s & 0x3F) | 0x40) & 0x3F)
+        s >>= 6
+    ret.append(s & 0x3F)
+    return bytes(ret)
+```
+
+To convert a `varint` into an unsigned integer:
+
+```py
+def decode_varint(chunks):
+    ret = 0
+    for chunk in reversed(chunks):
+        ret = (ret << 6) | chunk
+    return ret
+```
 
-### Signed integers (`svarint`)
+#### Signed integers (`svarint`)
 
 Signed integers are encoded by converting them to unsigned integers, using the following function:
-```Python
-def convert(s):
+
+```py
+def svarint_to_varint(s):
     if s < 0:
-        return ((-s)<<1) | 1
+        return ((-s) << 1) | 1
     else:
-        return (s<<1)
+        return s << 1
+```
+
+To convert a `varint` into a signed integer:
+
+```py
+def varint_to_svarint(uval):
+    return -(uval >> 1) if uval & 1 else (uval >> 1)
 ```
 
-*Location entries*
+### Location entries
 
 The meaning of the codes and the following bytes are as follows:
 
-Code | Meaning | Start line | End line | Start column | End column
- ---- | ---- | ---- | ---- | ---- | ----
- 0-9 | Short form | Δ 0 | Δ 0 | See below | See below
- 10-12 | One line form | Δ (code - 10) | Δ 0 | unsigned byte | unsigned byte
- 13 | No column info | Δ svarint | Δ 0 | None | None
- 14   | Long form | Δ svarint | Δ varint | varint | varint
- 15   | No location |  None | None | None | None
+| Code  | Meaning        | Start line    | End line | Start column  | End column    |
+|-------|----------------|---------------|----------|---------------|---------------|
+| 0-9   | Short form     | Δ 0           | Δ 0      | See below     | See below     |
+| 10-12 | One line form  | Δ (code - 10) | Δ 0      | unsigned byte | unsigned byte |
+| 13    | No column info | Δ svarint     | Δ 0      | None          | None          |
+| 14    | Long form      | Δ svarint     | Δ varint | varint        | varint        |
+| 15    | No location    | None          | None     | None          | None          |
 
 The Δ means the value is encoded as a delta from another value:
+
 * Start line: Delta from the previous start line, or `co_firstlineno` for the first entry.
-* End line: Delta from the start line
+* End line: Delta from the start line.
+
+### The short forms
 
-*The short forms*
+Codes 0-9 are the short forms. The short form consists of two bytes,
+the second byte holding additional column information. The code is the
+start column divided by 8 (and rounded down).
 
-Codes 0-9 are the short forms. The short form consists of two bytes, the second byte holding additional column information. The code is the start column divided by 8 (and rounded down).
 * Start column: `(code*8) + ((second_byte>>4)&7)`
 * End column: `start_column + (second_byte&15)`
diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md
index ed4cfb23ca51f7..9e99f348acbd8f 100644
--- a/InternalDocs/compiler.md
+++ b/InternalDocs/compiler.md
@@ -1,4 +1,3 @@
-
 Compiler design
 ===============
 
@@ -7,8 +6,8 @@ Abstract
 
 In CPython, the compilation from source code to bytecode involves several steps:
 
-1. Tokenize the source code [Parser/lexer/](../Parser/lexer/)
-   and [Parser/tokenizer/](../Parser/tokenizer/).
+1. Tokenize the source code [Parser/lexer/](../Parser/lexer)
+   and [Parser/tokenizer/](../Parser/tokenizer).
 2. Parse the stream of tokens into an Abstract Syntax Tree
    [Parser/parser.c](../Parser/parser.c).
 3. Transform AST into an instruction sequence
@@ -134,9 +133,8 @@ this case) a `stmt_ty` struct with the appropriate initialization.  The
 `FunctionDef()` constructor function sets 'kind' to `FunctionDef_kind` and
 initializes the *name*, *args*, *body*, and *attributes* fields.
 
-See also
-[Green Tree Snakes - The missing Python AST docs](https://greentreesnakes.readthedocs.io/en/latest)
- by Thomas Kluyver.
+See also [Green Tree Snakes - The missing Python AST docs](
+https://greentreesnakes.readthedocs.io/en/latest) by Thomas Kluyver.
 
 Memory management
 =================
@@ -260,12 +258,12 @@ manually -- `generic`, `identifier` and `int`.  These types are found in
 [Include/internal/pycore_asdl.h](../Include/internal/pycore_asdl.h).
 Functions and macros for creating `asdl_xx_seq *` types are as follows:
 
-`_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)`
-        Allocate memory for an `asdl_generic_seq` of the specified length
-`_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)`
-        Allocate memory for an `asdl_identifier_seq` of the specified length
-`_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)`
-        Allocate memory for an `asdl_int_seq` of the specified length
+* `_Py_asdl_generic_seq_new(Py_ssize_t, PyArena *)`:
+  Allocate memory for an `asdl_generic_seq` of the specified length
+* `_Py_asdl_identifier_seq_new(Py_ssize_t, PyArena *)`:
+  Allocate memory for an `asdl_identifier_seq` of the specified length
+* `_Py_asdl_int_seq_new(Py_ssize_t, PyArena *)`:
+  Allocate memory for an `asdl_int_seq` of the specified length
 
 In addition to the three types mentioned above, some ASDL sequence types are
 automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py) and found in
@@ -273,20 +271,20 @@ automatically generated by [Parser/asdl_c.py](../Parser/asdl_c.py) and found in
 Macros for using both manually defined and automatically generated ASDL
 sequence types are as follows:
 
-`asdl_seq_GET(asdl_xx_seq *, int)`
-        Get item held at a specific position in an `asdl_xx_seq`
-`asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)`
-        Set a specific index in an `asdl_xx_seq` to the specified value
+* `asdl_seq_GET(asdl_xx_seq *, int)`:
+  Get item held at a specific position in an `asdl_xx_seq`
+* `asdl_seq_SET(asdl_xx_seq *, int, stmt_ty)`:
+  Set a specific index in an `asdl_xx_seq` to the specified value
 
-Untyped counterparts exist for some of the typed macros.  These are useful
+Untyped counterparts exist for some of the typed macros. These are useful
 when a function needs to manipulate a generic ASDL sequence:
 
-`asdl_seq_GET_UNTYPED(asdl_seq *, int)`
-        Get item held at a specific position in an `asdl_seq`
-`asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)`
-        Set a specific index in an `asdl_seq` to the specified value
-`asdl_seq_LEN(asdl_seq *)`
-        Return the length of an `asdl_seq` or `asdl_xx_seq`
+* `asdl_seq_GET_UNTYPED(asdl_seq *, int)`:
+  Get item held at a specific position in an `asdl_seq`
+* `asdl_seq_SET_UNTYPED(asdl_seq *, int, stmt_ty)`:
+  Set a specific index in an `asdl_seq` to the specified value
+* `asdl_seq_LEN(asdl_seq *)`:
+  Return the length of an `asdl_seq` or `asdl_xx_seq`
 
 Note that typed macros and functions are recommended over their untyped
 counterparts.  Typed macros carry out checks in debug mode and aid
@@ -379,33 +377,33 @@ arguments to a node that used the '*' modifier).
 
 Emission of bytecode is handled by the following macros:
 
-* `ADDOP(struct compiler *, location, int)`
-    add a specified opcode
-* `ADDOP_IN_SCOPE(struct compiler *, location, int)`
-    like `ADDOP`, but also exits current scope; used for adding return value
-    opcodes in lambdas and closures
-* `ADDOP_I(struct compiler *, location, int, Py_ssize_t)`
-    add an opcode that takes an integer argument
-* `ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)`
-    add an opcode with the proper argument based on the position of the
-    specified PyObject in PyObject sequence object, but with no handling of
-    mangled names; used for when you
-    need to do named lookups of objects such as globals, consts, or
-    parameters where name mangling is not possible and the scope of the
-    name is known; *TYPE* is the name of PyObject sequence
-    (`names` or `varnames`)
-* `ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)`
-    just like `ADDOP_O`, but steals a reference to PyObject
-* `ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)`
-    just like `ADDOP_O`, but name mangling is also handled; used for
-    attribute loading or importing based on name
-* `ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)`
-    add the `LOAD_CONST` opcode with the proper argument based on the
-    position of the specified PyObject in the consts table.
-* `ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)`
-    just like `ADDOP_LOAD_CONST_NEW`, but steals a reference to PyObject
-* `ADDOP_JUMP(struct compiler *, location, int, basicblock *)`
-    create a jump to a basic block
+* `ADDOP(struct compiler *, location, int)`:
+  add a specified opcode
+* `ADDOP_IN_SCOPE(struct compiler *, location, int)`:
+  like `ADDOP`, but also exits current scope; used for adding return value
+  opcodes in lambdas and closures
+* `ADDOP_I(struct compiler *, location, int, Py_ssize_t)`:
+  add an opcode that takes an integer argument
+* `ADDOP_O(struct compiler *, location, int, PyObject *, TYPE)`:
+  add an opcode with the proper argument based on the position of the
+  specified PyObject in PyObject sequence object, but with no handling of
+  mangled names; used for when you
+  need to do named lookups of objects such as globals, consts, or
+  parameters where name mangling is not possible and the scope of the
+  name is known; *TYPE* is the name of PyObject sequence
+  (`names` or `varnames`)
+* `ADDOP_N(struct compiler *, location, int, PyObject *, TYPE)`:
+  just like `ADDOP_O`, but steals a reference to PyObject
+* `ADDOP_NAME(struct compiler *, location, int, PyObject *, TYPE)`:
+  just like `ADDOP_O`, but name mangling is also handled; used for
+  attribute loading or importing based on name
+* `ADDOP_LOAD_CONST(struct compiler *, location, PyObject *)`:
+  add the `LOAD_CONST` opcode with the proper argument based on the
+  position of the specified PyObject in the consts table.
+* `ADDOP_LOAD_CONST_NEW(struct compiler *, location, PyObject *)`:
+  just like `ADDOP_LOAD_CONST_NEW`, but steals a reference to PyObject
+* `ADDOP_JUMP(struct compiler *, location, int, basicblock *)`:
+  create a jump to a basic block
 
 The `location` argument is a struct with the source location to be
 associated with this instruction. It is typically extracted from an
@@ -433,7 +431,7 @@ Finally, the sequence of pseudo-instructions is converted into actual
 bytecode. This includes transforming pseudo instructions into actual instructions,
 converting jump targets from logical labels to relative offsets, and
 construction of the [exception table](exception_handling.md) and
-[locations table](locations.md).
+[locations table](code_objects.md#source-code-locations).
 The bytecode and tables are then wrapped into a `PyCodeObject` along with additional
 metadata, including the `consts` and `names` arrays, information about function
 reference to the source code (filename, etc). All of this is implemented by
@@ -453,7 +451,7 @@ in [Python/ceval.c](../Python/ceval.c).
 Important files
 ===============
 
-* [Parser/](../Parser/)
+* [Parser/](../Parser)
 
   * [Parser/Python.asdl](../Parser/Python.asdl):
     ASDL syntax file.
@@ -534,7 +532,7 @@ Important files
   * [Python/instruction_sequence.c](../Python/instruction_sequence.c):
     A data structure representing a sequence of bytecode-like pseudo-instructions.
 
-* [Include/](../Include/)
+* [Include/](../Include)
 
   * [Include/cpython/code.h](../Include/cpython/code.h)
     : Header file for [Objects/codeobject.c](../Objects/codeobject.c);
@@ -556,7 +554,7 @@ Important files
     : Declares `_PyAST_Validate()` external (from [Python/ast.c](../Python/ast.c)).
 
   * [Include/internal/pycore_symtable.h](../Include/internal/pycore_symtable.h)
-    :  Header for [Python/symtable.c](../Python/symtable.c).
+    : Header for [Python/symtable.c](../Python/symtable.c).
     `struct symtable` and `PySTEntryObject` are defined here.
 
   * [Include/internal/pycore_parser.h](../Include/internal/pycore_parser.h)
@@ -570,7 +568,7 @@ Important files
     by
     [Tools/cases_generator/opcode_id_generator.py](../Tools/cases_generator/opcode_id_generator.py).
 
-* [Objects/](../Objects/)
+* [Objects/](../Objects)
 
   * [Objects/codeobject.c](../Objects/codeobject.c)
     : Contains PyCodeObject-related code.
@@ -579,7 +577,7 @@ Important files
     : Contains the `frame_setlineno()` function which should determine whether it is allowed
     to make a jump between two points in a bytecode.
 
-* [Lib/](../Lib/)
+* [Lib/](../Lib)
 
   * [Lib/opcode.py](../Lib/opcode.py)
     : opcode utilities exposed to Python.
@@ -591,7 +589,7 @@ Important files
 Objects
 =======
 
-* [Locations](locations.md): Describes the location table
+* [Locations](code_objects.md#source-code-locations): Describes the location table
 * [Frames](frames.md): Describes frames and the frame stack
 * [Objects/object_layout.md](../Objects/object_layout.md): Describes object layout for 3.11 and later
 * [Exception Handling](exception_handling.md): Describes the exception table
diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md
index 14066a5864b4da..28589787e1fad7 100644
--- a/InternalDocs/exception_handling.md
+++ b/InternalDocs/exception_handling.md
@@ -87,10 +87,10 @@ offset of the raising instruction should be pushed to the stack.
 Handling an exception, once an exception table entry is found, consists
 of the following steps:
 
- 1. pop values from the stack until it matches the stack depth for the handler.
- 2. if `lasti` is true, then push the offset that the exception was raised at.
- 3. push the exception to the stack.
- 4. jump to the target offset and resume execution.
+1. pop values from the stack until it matches the stack depth for the handler.
+2. if `lasti` is true, then push the offset that the exception was raised at.
+3. push the exception to the stack.
+4. jump to the target offset and resume execution.
 
 
 Reraising Exceptions and `lasti`
@@ -107,13 +107,12 @@ Format of the exception table
 -----------------------------
 
 Conceptually, the exception table consists of a sequence of 5-tuples:
-```
-    1. `start-offset` (inclusive)
-    2. `end-offset` (exclusive)
-    3. `target`
-    4. `stack-depth`
-    5. `push-lasti` (boolean)
-```
+
+1. `start-offset` (inclusive)
+2. `end-offset` (exclusive)
+3. `target`
+4. `stack-depth`
+5. `push-lasti` (boolean)
 
 All offsets and lengths are in code units, not bytes.
 
@@ -123,18 +122,19 @@ For it to be searchable quickly, we need to support binary search giving us log(
 Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry.
 
 It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as:
-    `start, size, target, depth, push-lasti`.
+`start, size, target, depth, push-lasti`.
 
 Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each code unit takes 2 bytes.
 It also happens that depth is generally quite small.
 
 So, we need to encode:
+
 ```
-    `start` (up to 30 bits)
-    `size` (up to 30 bits)
-    `target` (up to 30 bits)
-    `depth` (up to ~8 bits)
-    `lasti` (1 bit)
+start   (up to 30 bits)
+size    (up to 30 bits)
+target  (up to 30 bits)
+depth   (up to ~8 bits)
+lasti   (1 bit)
 ```
 
 We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set.
@@ -145,29 +145,32 @@ The 8 bits of a byte are (msb left) SXdddddd where S is the start bit. X is the
 In addition, we combine `depth` and `lasti` into a single value, `((depth<<1)+lasti)`, before encoding.
 
 For example, the exception entry:
+
 ```
-    `start`:  20
-    `end`:    28
-    `target`: 100
-    `depth`:  3
-    `lasti`:  False
+start:              20
+end:                28
+target:             100
+depth:              3
+lasti:              False
 ```
 
 is encoded by first converting to the more compact four value form:
+
 ```
-    `start`:         20
-    `size`:          8
-    `target`:        100
-  `depth<<1+lasti`:  6
+start:              20
+size:               8
+target:             100
+depth<<1+lasti:     6
 ```
 
 which is then encoded as:
+
 ```
-    148 (MSB + 20 for start)
-    8   (size)
-    65  (Extend bit + 1)
-    36  (Remainder of target, 100 == (1<<6)+36)
-    6
+148     (MSB + 20 for start)
+8       (size)
+65      (Extend bit + 1)
+36      (Remainder of target, 100 == (1<<6)+36)
+6
 ```
 
 for a total of five bytes.
diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md
index 06dc8f0702c3d9..2598873ca98479 100644
--- a/InternalDocs/frames.md
+++ b/InternalDocs/frames.md
@@ -27,6 +27,7 @@ objects, so are not allocated in the per-thread stack. See `PyGenObject` in
 ## Layout
 
 Each activation record is laid out as:
+
 * Specials
 * Locals
 * Stack
diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md
index 272a0834cbfe24..08db080a200ea4 100644
--- a/InternalDocs/garbage_collector.md
+++ b/InternalDocs/garbage_collector.md
@@ -1,4 +1,3 @@
-
 Garbage collector design
 ========================
 
@@ -117,7 +116,7 @@ general, the collection of all objects tracked by GC is partitioned into disjoin
 doubly linked list.  Between collections, objects are partitioned into "generations", reflecting how
 often they've survived collection attempts.  During collections, the generation(s) being collected
 are further partitioned into, for example, sets of reachable and unreachable objects.  Doubly linked lists
-support moving an object from one partition to another, adding a new object,  removing an object
+support moving an object from one partition to another, adding a new object, removing an object
 entirely (objects tracked by GC are most often reclaimed by the refcounting system when GC
 isn't running at all!), and merging partitions, all with a small constant number of pointer updates.
 With care, they also support iterating over a partition while objects are being added to - and
@@ -478,6 +477,45 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
 ```
 
 
+Optimization: visiting reachable objects
+========================================
+
+An object cannot be garbage if it can be reached.
+
+To avoid having to identify reference cycles across the whole heap, we can
+reduce the amount of work done considerably by first moving most reachable objects
+to the `visited` space. Empirically, most reachable objects can be reached from a
+small set of global objects and local variables.
+This step does much less work per object, so reduces the time spent
+performing garbage collection by at least half.
+
+> [!NOTE]
+> Objects that are not determined to be reachable by this pass are not necessarily
+> unreachable. We still need to perform the main algorithm to determine which objects
+> are actually unreachable.
+We use the same technique of forming a transitive closure as the incremental
+collector does to find reachable objects, seeding the list with some global
+objects and the currently executing frames.
+
+This phase moves objects to the `visited` space, as follows:
+
+1. All objects directly referred to by any builtin class, the `sys` module, the `builtins`
+module and all objects directly referred to from stack frames are added to a working
+set of reachable objects.
+2. Until this working set is empty:
+   1. Pop an object from the set and move it to the `visited` space
+   2. For each object directly reachable from that object:
+      * If it is not already in `visited` space and it is a GC object,
+        add it to the working set
+
+
+Before each increment of collection is performed, the stacks are scanned
+to check for any new stack frames that have been created since the last
+increment. All objects directly referred to from those stack frames are
+added to the working set.
+Then the above algorithm is repeated, starting from step 2.
+
+
 Optimization: reusing fields to save memory
 ===========================================
 
diff --git a/InternalDocs/generators.md b/InternalDocs/generators.md
index d53f0f9bdff4e4..afa8b8f4bb8040 100644
--- a/InternalDocs/generators.md
+++ b/InternalDocs/generators.md
@@ -1,4 +1,3 @@
-
 Generators
 ========== 
 
diff --git a/InternalDocs/interpreter.md b/InternalDocs/interpreter.md
index 4c10cbbed37735..ab149e43471072 100644
--- a/InternalDocs/interpreter.md
+++ b/InternalDocs/interpreter.md
@@ -1,4 +1,3 @@
-
 The bytecode interpreter
 ========================
 
diff --git a/InternalDocs/parser.md b/InternalDocs/parser.md
index 348988b7c2f003..445b866fc0cb96 100644
--- a/InternalDocs/parser.md
+++ b/InternalDocs/parser.md
@@ -1,4 +1,3 @@
-
 Guide to the parser
 ===================
 
@@ -444,15 +443,15 @@ How to regenerate the parser
 Once you have made the changes to the grammar files, to regenerate the `C`
 parser (the one used by the interpreter) just execute:
 
-```
-    make regen-pegen
+```shell
+$ make regen-pegen
 ```
 
 using the `Makefile` in the main directory.  If you are on Windows you can
 use the Visual Studio project files to regenerate the parser or to execute:
 
-```
-    ./PCbuild/build.bat --regen
+```dos
+PCbuild/build.bat --regen
 ```
 
 The generated parser file is located at [`Parser/parser.c`](../Parser/parser.c).
@@ -468,15 +467,15 @@ any modifications to this file (in order to implement new Pegen features) you wi
 need to regenerate the meta-parser (the parser that parses the grammar files).
 To do so just execute:
 
-```
-    make regen-pegen-metaparser
+```shell
+$ make regen-pegen-metaparser
 ```
 
 If you are on Windows you can use the Visual Studio project files
 to regenerate the parser or to execute:
 
-```
-    ./PCbuild/build.bat --regen
+```dos
+PCbuild/build.bat --regen
 ```
 
 
@@ -516,15 +515,15 @@ be found in the [`Grammar/Tokens`](../Grammar/Tokens)
 file. If you change this file to add new tokens, make sure to regenerate the
 files by executing:
 
-```
-    make regen-token
+```shell
+$ make regen-token
 ```
 
 If you are on Windows you can use the Visual Studio project files to regenerate
 the tokens or to execute:
 
-```
-    ./PCbuild/build.bat --regen
+```dos
+PCbuild/build.bat --regen
 ```
 
 How tokens are generated and the rules governing this are completely up to the tokenizer
@@ -546,8 +545,8 @@ by default** except for rules with the special marker `memo` after the rule
 name (and type, if present):
 
 ```
-    rule_name[typr] (memo):
-      ...
+rule_name[typr] (memo):
+  ...
 ```
 
 By selectively turning on memoization for a handful of rules, the parser becomes
@@ -593,25 +592,25 @@ are always reserved words, even in positions where they make no sense
 meaning in context. Trying to use a hard keyword as a variable will always
 fail:
 
-```
-    >>> class = 3
-    File "<stdin>", line 1
-        class = 3
-            ^
-    SyntaxError: invalid syntax
-    >>> foo(class=3)
-    File "<stdin>", line 1
-        foo(class=3)
-            ^^^^^
-    SyntaxError: invalid syntax
+```pycon
+>>> class = 3
+File "<stdin>", line 1
+    class = 3
+        ^
+SyntaxError: invalid syntax
+>>> foo(class=3)
+File "<stdin>", line 1
+    foo(class=3)
+        ^^^^^
+SyntaxError: invalid syntax
 ```
 
 While soft keywords don't have this limitation if used in a context other the
 one where they are defined as keywords:
 
-```
-    >>> match = 45
-    >>> foo(match="Yeah!")
+```pycon
+>>> match = 45
+>>> foo(match="Yeah!")
 ```
 
 The `match` and `case` keywords are soft keywords, so that they are
@@ -621,21 +620,21 @@ argument names.
 
 You can get a list of all keywords defined in the grammar from Python:
 
-```
-    >>> import keyword
-    >>> keyword.kwlist
-    ['False', 'None', 'True', 'and', 'as', 'assert', 'async', 'await', 'break',
-    'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally', 'for',
-    'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or',
-    'pass', 'raise', 'return', 'try', 'while', 'with', 'yield']
+```pycon
+>>> import keyword
+>>> keyword.kwlist
+['False', 'None', 'True', 'and', 'as', 'assert', 'async', 'await', 'break',
+'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally', 'for',
+'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or',
+'pass', 'raise', 'return', 'try', 'while', 'with', 'yield']
 ```
 
 as well as soft keywords:
 
-```
-    >>> import keyword
-    >>> keyword.softkwlist
-    ['_', 'case', 'match']
+```pycon
+>>> import keyword
+>>> keyword.softkwlist
+['_', 'case', 'match']
 ```
 
 > [!CAUTION]
@@ -736,7 +735,7 @@ displayed when the error is reported.
 > rule or not. For example:
 
 ```
-   <valid python code> $ 42
+<valid python code> $ 42
 ```
 
 should trigger the syntax error in the `$` character. If your rule is not correctly defined this
@@ -744,7 +743,7 @@ won't happen. As another example, suppose that you try to define a rule to match
 `print` statements in order to create a better error message and you define it as:
 
 ```
-    invalid_print: "print" expression
+invalid_print: "print" expression
 ```
 
 This will **seem** to work because the parser will correctly parse `print(something)` because it is valid
@@ -756,7 +755,7 @@ will be reported there instead of the `$` character.
 Generating AST objects
 ----------------------
 
-The output of the C parser used by CPython,  which is generated from the
+The output of the C parser used by CPython, which is generated from the
 [grammar file](../Grammar/python.gram), is a Python AST object (using C
 structures). This means that the actions in the grammar file generate AST
 objects when they succeed. Constructing these objects can be quite cumbersome
@@ -798,7 +797,7 @@ Check the contents of these files to know which is the best place for new
 tests, depending on the nature of the new feature you are adding.
 
 Tests for the parser generator itself can be found in the
-[test_peg_generator](../Lib/test_peg_generator) directory.
+[test_peg_generator](../Lib/test/test_peg_generator) directory.
 
 
 Debugging generated parsers
@@ -816,15 +815,15 @@ For this reason it is a good idea to experiment first by generating a Python
 parser. To do this, you can go to the [Tools/peg_generator](../Tools/peg_generator)
 directory on the CPython repository and manually call the parser generator by executing:
 
-```
-    $ python -m pegen python <PATH TO YOUR GRAMMAR FILE>
+```shell
+$ python -m pegen python <PATH TO YOUR GRAMMAR FILE>
 ```
 
 This will generate a file called `parse.py` in the same directory that you
 can use to parse some input:
 
-```
-    $ python parse.py file_with_source_code_to_test.py
+```shell
+$ python parse.py file_with_source_code_to_test.py
 ```
 
 As the generated `parse.py` file is just Python code, you can modify it
@@ -848,8 +847,8 @@ can be a bit hard to understand at first.
 
 To activate verbose mode you can add the `-d` flag when executing Python:
 
-```
-    $ python -d file_to_test.py
+```shell
+$ python -d file_to_test.py
 ```
 
 This will print **a lot** of output to `stderr` so it is probably better to dump
@@ -857,7 +856,7 @@ it to a file for further analysis. The output consists of trace lines with the
 following structure::
 
 ```
-    <indentation> ('>'|'-'|'+'|'!') <rule_name>[<token_location>]: <alternative> ...
+<indentation> ('>'|'-'|'+'|'!') <rule_name>[<token_location>]: <alternative> ...
 ```
 
 Every line is indented by a different amount (`<indentation>`) depending on how
diff --git a/InternalDocs/string_interning.md b/InternalDocs/string_interning.md
index e0d20632516142..26a5197c6e70f3 100644
--- a/InternalDocs/string_interning.md
+++ b/InternalDocs/string_interning.md
@@ -2,6 +2,7 @@
 
 *Interned* strings are conceptually part of an interpreter-global
 *set* of interned strings, meaning that:
+
 - no two interned strings have the same content (across an interpreter);
 - two interned strings can be safely compared using pointer equality
   (Python `is`).
@@ -61,6 +62,7 @@ if it's interned and mortal it needs extra processing in
 
 The converse is not true: interned strings can be mortal.
 For mortal interned strings:
+
 - the 2 references from the interned dict (key & value) are excluded from
   their refcount
 - the deallocator (`unicode_dealloc`) removes the string from the interned dict
@@ -90,6 +92,7 @@ modify in place.
 The functions take ownership of (“steal”) the reference to their argument,
 and update the argument with a *new* reference.
 This means:
+
 - They're “reference neutral”.
 - They must not be called with a borrowed reference.
 
diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py
index 996887cb713942..710aba9685efda 100644
--- a/Lib/multiprocessing/connection.py
+++ b/Lib/multiprocessing/connection.py
@@ -963,7 +963,7 @@ def answer_challenge(connection, authkey: bytes):
                 f'Protocol error, expected challenge: {message=}')
     message = message[len(_CHALLENGE):]
     if len(message) < _MD5ONLY_MESSAGE_LENGTH:
-        raise AuthenticationError('challenge too short: {len(message)} bytes')
+        raise AuthenticationError(f'challenge too short: {len(message)} bytes')
     digest = _create_response(authkey, message)
     connection.send_bytes(digest)
     response = connection.recv_bytes(256)        # reject large message
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py
index 2c243d470d4eda..2b314b6c9a16bf 100644
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -438,14 +438,6 @@ def stat(self, *, follow_symlinks=True):
         """
         raise UnsupportedOperation(self._unsupported_msg('stat()'))
 
-    def lstat(self):
-        """
-        Like stat(), except if the path points to a symlink, the symlink's
-        status information is returned, rather than its target's.
-        """
-        return self.stat(follow_symlinks=False)
-
-
     # Convenience functions for querying the stat results
 
     def exists(self, *, follow_symlinks=True):
@@ -505,7 +497,7 @@ def is_symlink(self):
         Whether this path is a symbolic link.
         """
         try:
-            return S_ISLNK(self.lstat().st_mode)
+            return S_ISLNK(self.stat(follow_symlinks=False).st_mode)
         except (OSError, ValueError):
             return False
 
@@ -743,27 +735,12 @@ def absolute(self):
             # Treat the root directory as the current working directory.
             return self.with_segments('/', *self._raw_paths)
 
-    @classmethod
-    def cwd(cls):
-        """Return a new path pointing to the current working directory."""
-        # We call 'absolute()' rather than using 'os.getcwd()' directly to
-        # enable users to replace the implementation of 'absolute()' in a
-        # subclass and benefit from the new behaviour here. This works because
-        # os.path.abspath('.') == os.getcwd().
-        return cls().absolute()
-
     def expanduser(self):
         """ Return a new path with expanded ~ and ~user constructs
         (as returned by os.path.expanduser)
         """
         raise UnsupportedOperation(self._unsupported_msg('expanduser()'))
 
-    @classmethod
-    def home(cls):
-        """Return a new path pointing to expanduser('~').
-        """
-        return cls("~").expanduser()
-
     def readlink(self):
         """
         Return the path to which the symbolic link points.
@@ -789,7 +766,7 @@ def raise_error(*args):
             def lstat(path_str):
                 path = self.with_segments(path_str)
                 path._resolving = True
-                return path.lstat()
+                return path.stat(follow_symlinks=False)
 
             def readlink(path_str):
                 path = self.with_segments(path_str)
diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py
index b27f456d375225..b5d9dc49f58463 100644
--- a/Lib/pathlib/_local.py
+++ b/Lib/pathlib/_local.py
@@ -542,6 +542,13 @@ def stat(self, *, follow_symlinks=True):
         """
         return os.stat(self, follow_symlinks=follow_symlinks)
 
+    def lstat(self):
+        """
+        Like stat(), except if the path points to a symlink, the symlink's
+        status information is returned, rather than its target's.
+        """
+        return os.lstat(self)
+
     def exists(self, *, follow_symlinks=True):
         """
         Whether this path exists.
@@ -719,6 +726,14 @@ def absolute(self):
         tail.extend(self._tail)
         return self._from_parsed_parts(drive, root, tail)
 
+    @classmethod
+    def cwd(cls):
+        """Return a new path pointing to the current working directory."""
+        cwd = os.getcwd()
+        path = cls(cwd)
+        path._str = cwd  # getcwd() returns a normalized path
+        return path
+
     def resolve(self, strict=False):
         """
         Make the path absolute, resolving all symlinks on the way and also
@@ -900,6 +915,15 @@ def expanduser(self):
 
         return self
 
+    @classmethod
+    def home(cls):
+        """Return a new path pointing to expanduser('~').
+        """
+        homedir = os.path.expanduser("~")
+        if homedir == "~":
+            raise RuntimeError("Could not determine home directory.")
+        return cls(homedir)
+
     @classmethod
     def from_uri(cls, uri):
         """Return a new path from the given 'file' URI."""
diff --git a/Lib/pdb.py b/Lib/pdb.py
index b7f6fd4323407e..10d1923cdad2d6 100644
--- a/Lib/pdb.py
+++ b/Lib/pdb.py
@@ -438,6 +438,13 @@ def user_line(self, frame):
             if (self.mainpyfile != self.canonic(frame.f_code.co_filename)):
                 return
             self._wait_for_mainpyfile = False
+        if self.trace_opcodes:
+            # GH-127321
+            # We want to avoid stopping at an opcode that does not have
+            # an associated line number because pdb does not like it
+            if frame.f_lineno is None:
+                self.set_stepinstr()
+                return
         self.bp_commands(frame)
         self.interaction(frame, None)
 
diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py
index ee52700b51fd07..ad86609016e478 100644
--- a/Lib/sysconfig/__init__.py
+++ b/Lib/sysconfig/__init__.py
@@ -318,14 +318,22 @@ def get_default_scheme():
 
 def get_makefile_filename():
     """Return the path of the Makefile."""
+
+    # GH-127429: When cross-compiling, use the Makefile from the target, instead of the host Python.
+    if cross_base := os.environ.get('_PYTHON_PROJECT_BASE'):
+        return os.path.join(cross_base, 'Makefile')
+
     if _PYTHON_BUILD:
         return os.path.join(_PROJECT_BASE, "Makefile")
+
     if hasattr(sys, 'abiflags'):
         config_dir_name = f'config-{_PY_VERSION_SHORT}{sys.abiflags}'
     else:
         config_dir_name = 'config'
+
     if hasattr(sys.implementation, '_multiarch'):
         config_dir_name += f'-{sys.implementation._multiarch}'
+
     return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile')
 
 
@@ -464,27 +472,44 @@ def get_path(name, scheme=get_default_scheme(), vars=None, expand=True):
 def _init_config_vars():
     global _CONFIG_VARS
     _CONFIG_VARS = {}
+
+    prefix = _PREFIX
+    exec_prefix = _EXEC_PREFIX
+    base_prefix = _BASE_PREFIX
+    base_exec_prefix = _BASE_EXEC_PREFIX
+
+    try:
+        abiflags = sys.abiflags
+    except AttributeError:
+        abiflags = ''
+
+    if os.name == 'posix':
+        _init_posix(_CONFIG_VARS)
+        # If we are cross-compiling, load the prefixes from the Makefile instead.
+        if '_PYTHON_PROJECT_BASE' in os.environ:
+            prefix = _CONFIG_VARS['prefix']
+            exec_prefix = _CONFIG_VARS['exec_prefix']
+            base_prefix = _CONFIG_VARS['prefix']
+            base_exec_prefix = _CONFIG_VARS['exec_prefix']
+            abiflags = _CONFIG_VARS['ABIFLAGS']
+
     # Normalized versions of prefix and exec_prefix are handy to have;
     # in fact, these are the standard versions used most places in the
     # Distutils.
-    _CONFIG_VARS['prefix'] = _PREFIX
-    _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX
+    _CONFIG_VARS['prefix'] = prefix
+    _CONFIG_VARS['exec_prefix'] = exec_prefix
     _CONFIG_VARS['py_version'] = _PY_VERSION
     _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT
     _CONFIG_VARS['py_version_nodot'] = _PY_VERSION_SHORT_NO_DOT
-    _CONFIG_VARS['installed_base'] = _BASE_PREFIX
-    _CONFIG_VARS['base'] = _PREFIX
-    _CONFIG_VARS['installed_platbase'] = _BASE_EXEC_PREFIX
-    _CONFIG_VARS['platbase'] = _EXEC_PREFIX
+    _CONFIG_VARS['installed_base'] = base_prefix
+    _CONFIG_VARS['base'] = prefix
+    _CONFIG_VARS['installed_platbase'] = base_exec_prefix
+    _CONFIG_VARS['platbase'] = exec_prefix
     _CONFIG_VARS['projectbase'] = _PROJECT_BASE
     _CONFIG_VARS['platlibdir'] = sys.platlibdir
     _CONFIG_VARS['implementation'] = _get_implementation()
     _CONFIG_VARS['implementation_lower'] = _get_implementation().lower()
-    try:
-        _CONFIG_VARS['abiflags'] = sys.abiflags
-    except AttributeError:
-        # sys.abiflags may not be defined on all platforms.
-        _CONFIG_VARS['abiflags'] = ''
+    _CONFIG_VARS['abiflags'] = abiflags
     try:
         _CONFIG_VARS['py_version_nodot_plat'] = sys.winver.replace('.', '')
     except AttributeError:
@@ -493,8 +518,6 @@ def _init_config_vars():
     if os.name == 'nt':
         _init_non_posix(_CONFIG_VARS)
         _CONFIG_VARS['VPATH'] = sys._vpath
-    if os.name == 'posix':
-        _init_posix(_CONFIG_VARS)
     if _HAS_USER_BASE:
         # Setting 'userbase' is done below the call to the
         # init function to enable using 'get_config_var' in
diff --git a/Lib/sysconfig/__main__.py b/Lib/sysconfig/__main__.py
index 5660a6c5105b9f..10728c709e1811 100644
--- a/Lib/sysconfig/__main__.py
+++ b/Lib/sysconfig/__main__.py
@@ -7,6 +7,7 @@
     _PYTHON_BUILD,
     _get_sysconfigdata_name,
     get_config_h_filename,
+    get_config_var,
     get_config_vars,
     get_default_scheme,
     get_makefile_filename,
@@ -161,7 +162,7 @@ def _print_config_dict(d, stream):
 
 def _get_pybuilddir():
     pybuilddir = f'build/lib.{get_platform()}-{get_python_version()}'
-    if hasattr(sys, "gettotalrefcount"):
+    if get_config_var('Py_DEBUG') == '1':
         pybuilddir += '-pydebug'
     return pybuilddir
 
@@ -229,11 +230,15 @@ def _generate_posix_vars():
         f.write('build_time_vars = ')
         _print_config_dict(vars, stream=f)
 
+    print(f'Written {destfile}')
+
     # Write a JSON file with the output of sysconfig.get_config_vars
     jsonfile = os.path.join(pybuilddir, _get_json_data_name())
     with open(jsonfile, 'w') as f:
         json.dump(get_config_vars(), f, indent=2)
 
+    print(f'Written {jsonfile}')
+
     # Create file used for sys.path fixup -- see Modules/getpath.c
     with open('pybuilddir.txt', 'w', encoding='utf8') as f:
         f.write(pybuilddir)
diff --git a/Lib/test/libregrtest/refleak.py b/Lib/test/libregrtest/refleak.py
index e783475cc7a36b..d0d1c8cdc9a11b 100644
--- a/Lib/test/libregrtest/refleak.py
+++ b/Lib/test/libregrtest/refleak.py
@@ -123,9 +123,9 @@ def get_pooled_int(value):
     xml_filename = 'refleak-xml.tmp'
     result = None
     dash_R_cleanup(fs, ps, pic, zdc, abcs)
-    support.gc_collect()
 
     for i in rep_range:
+        support.gc_collect()
         current = refleak_helper._hunting_for_refleaks
         refleak_helper._hunting_for_refleaks = True
         try:
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py
index f26411ace8fa73..55890e58ed4bae 100644
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -1260,27 +1260,6 @@ def test_super_instructions(self):
         got = self.get_disassembly(load_test, adaptive=True)
         self.do_disassembly_compare(got, dis_load_test_quickened_code)
 
-    @cpython_only
-    @requires_specialization
-    def test_binary_subscr_specialize(self):
-        binary_subscr_quicken = """\
-  0           RESUME_CHECK             0
-
-  1           LOAD_NAME                0 (a)
-              LOAD_SMALL_INT           0
-              %s
-              RETURN_VALUE
-"""
-        co_list = compile('a[0]', "<list>", "eval")
-        self.code_quicken(lambda: exec(co_list, {}, {'a': [0]}))
-        got = self.get_disassembly(co_list, adaptive=True)
-        self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_LIST_INT")
-
-        co_dict = compile('a[0]', "<dict>", "eval")
-        self.code_quicken(lambda: exec(co_dict, {}, {'a': {0: '1'}}))
-        got = self.get_disassembly(co_dict, adaptive=True)
-        self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_DICT")
-
     @cpython_only
     @requires_specialization
     def test_load_attr_specialize(self):
diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py
index 51463b6bb8c1b4..53f6d778bbecbc 100644
--- a/Lib/test/test_free_threading/test_type.py
+++ b/Lib/test/test_free_threading/test_type.py
@@ -124,6 +124,21 @@ def work():
         for thread in threads:
             thread.join()
 
+    def test_object_class_change(self):
+        class Base:
+            def __init__(self):
+                self.attr = 123
+        class ClassA(Base):
+            pass
+        class ClassB(Base):
+            pass
+
+        obj = ClassA()
+        # keep reference to __dict__
+        d = obj.__dict__
+        obj.__class__ = ClassB
+
+
     def run_one(self, writer_func, reader_func):
         writer = Thread(target=writer_func)
         readers = []
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 0372815b9bfd27..b5140057a69d36 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -31,6 +31,11 @@ def __new__(cls, *args, **kwargs):
         return C
     ContainerNoGC = None
 
+try:
+    import _testinternalcapi
+except ImportError:
+    _testinternalcapi = None
+
 ### Support code
 ###############################################################################
 
@@ -1130,6 +1135,7 @@ def setUp(self):
     def tearDown(self):
         gc.disable()
 
+    @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
     @requires_gil_enabled("Free threading does not support incremental GC")
     # Use small increments to emulate longer running process in a shorter time
     @gc_threshold(200, 10)
@@ -1167,20 +1173,15 @@ def make_ll(depth):
         enabled = gc.isenabled()
         gc.enable()
         olds = []
+        initial_heap_size = _testinternalcapi.get_tracked_heap_size()
         for i in range(20_000):
             newhead = make_ll(20)
             count += 20
             newhead.surprise = head
             olds.append(newhead)
             if len(olds) == 20:
-                stats = gc.get_stats()
-                young = stats[0]
-                incremental = stats[1]
-                old = stats[2]
-                collected = young['collected'] + incremental['collected'] + old['collected']
-                count += CORRECTION
-                live = count - collected
-                self.assertLess(live, 25000)
+                new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size
+                self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations")
                 del olds[:]
         if not enabled:
             gc.disable()
@@ -1322,7 +1323,8 @@ def test_refcount_errors(self):
             from test.support import gc_collect, SuppressCrashReport
 
             a = [1, 2, 3]
-            b = [a]
+            b = [a, a]
+            a.append(b)
 
             # Avoid coredump when Py_FatalError() calls abort()
             SuppressCrashReport().__enter__()
@@ -1332,6 +1334,8 @@ def test_refcount_errors(self):
             # (to avoid deallocating it):
             import ctypes
             ctypes.pythonapi.Py_DecRef(ctypes.py_object(a))
+            del a
+            del b
 
             # The garbage collector should now have a fatal error
             # when it reaches the broken object
@@ -1360,7 +1364,7 @@ def test_refcount_errors(self):
         self.assertRegex(stderr,
             br'object type name: list')
         self.assertRegex(stderr,
-            br'object repr     : \[1, 2, 3\]')
+            br'object repr     : \[1, 2, 3, \[\[...\], \[...\]\]\]')
 
 
 class GCTogglingTests(unittest.TestCase):
diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py
index 0e39998ebb3783..c52b7f3e09bea1 100644
--- a/Lib/test/test_import/__init__.py
+++ b/Lib/test/test_import/__init__.py
@@ -1133,6 +1133,19 @@ def test_script_shadowing_stdlib_sys_path_modification(self):
                 stdout, stderr = popen.communicate()
                 self.assertRegex(stdout, expected_error)
 
+    def test_create_dynamic_null(self):
+        with self.assertRaisesRegex(ValueError, 'embedded null character'):
+            class Spec:
+                name = "a\x00b"
+                origin = "abc"
+            _imp.create_dynamic(Spec())
+
+        with self.assertRaisesRegex(ValueError, 'embedded null character'):
+            class Spec2:
+                name = "abc"
+                origin = "a\x00b"
+            _imp.create_dynamic(Spec2())
+
 
 @skip_if_dont_write_bytecode
 class FilePermissionTests(unittest.TestCase):
diff --git a/Lib/test/test_interpreters/test_api.py b/Lib/test/test_interpreters/test_api.py
index a9befbba64daa0..01856d9bf67657 100644
--- a/Lib/test/test_interpreters/test_api.py
+++ b/Lib/test/test_interpreters/test_api.py
@@ -1649,6 +1649,10 @@ def test_set___main___attrs(self):
             self.assertIs(after2, None)
             self.assertEqual(after3.type.__name__, 'AssertionError')
 
+            with self.assertRaises(ValueError):
+                # GH-127165: Embedded NULL characters broke the lookup
+                _interpreters.set___main___attrs(interpid, {"\x00": 1})
+
         with self.subTest('from C-API'):
             with self.interpreter_from_capi() as interpid:
                 with self.assertRaisesRegex(InterpreterError, 'unrecognized'):
diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index 1a6eac236009c3..b989b21cd9b3a9 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -617,7 +617,7 @@ def write(items):
         opname = "BINARY_SUBSCR_GETITEM"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
-    @requires_specialization
+    @requires_specialization_ft
     def test_binary_subscr_list_int(self):
         def get_items():
             items = []
@@ -1023,7 +1023,7 @@ def write(items):
         opname = "STORE_ATTR_WITH_HINT"
         self.assert_races_do_not_crash(opname, get_items, read, write)
 
-    @requires_specialization
+    @requires_specialization_ft
     def test_store_subscr_list_int(self):
         def get_items():
             items = []
@@ -1229,48 +1229,48 @@ class TestSpecializer(TestBase):
     @cpython_only
     @requires_specialization_ft
     def test_binary_op(self):
-        def f():
+        def binary_op_add_int():
             for _ in range(100):
                 a, b = 1, 2
                 c = a + b
                 self.assertEqual(c, 3)
 
-        f()
-        self.assert_specialized(f, "BINARY_OP_ADD_INT")
-        self.assert_no_opcode(f, "BINARY_OP")
+        binary_op_add_int()
+        self.assert_specialized(binary_op_add_int, "BINARY_OP_ADD_INT")
+        self.assert_no_opcode(binary_op_add_int, "BINARY_OP")
 
-        def g():
+        def binary_op_add_unicode():
             for _ in range(100):
                 a, b = "foo", "bar"
                 c = a + b
                 self.assertEqual(c, "foobar")
 
-        g()
-        self.assert_specialized(g, "BINARY_OP_ADD_UNICODE")
-        self.assert_no_opcode(g, "BINARY_OP")
+        binary_op_add_unicode()
+        self.assert_specialized(binary_op_add_unicode, "BINARY_OP_ADD_UNICODE")
+        self.assert_no_opcode(binary_op_add_unicode, "BINARY_OP")
 
     @cpython_only
     @requires_specialization_ft
     def test_contain_op(self):
-        def f():
+        def contains_op_dict():
             for _ in range(100):
                 a, b = 1, {1: 2, 2: 5}
                 self.assertTrue(a in b)
                 self.assertFalse(3 in b)
 
-        f()
-        self.assert_specialized(f, "CONTAINS_OP_DICT")
-        self.assert_no_opcode(f, "CONTAINS_OP")
+        contains_op_dict()
+        self.assert_specialized(contains_op_dict, "CONTAINS_OP_DICT")
+        self.assert_no_opcode(contains_op_dict, "CONTAINS_OP")
 
-        def g():
+        def contains_op_set():
             for _ in range(100):
                 a, b = 1, {1, 2}
                 self.assertTrue(a in b)
                 self.assertFalse(3 in b)
 
-        g()
-        self.assert_specialized(g, "CONTAINS_OP_SET")
-        self.assert_no_opcode(g, "CONTAINS_OP")
+        contains_op_set()
+        self.assert_specialized(contains_op_set, "CONTAINS_OP_SET")
+        self.assert_no_opcode(contains_op_set, "CONTAINS_OP")
 
     @cpython_only
     @requires_specialization_ft
@@ -1342,34 +1342,81 @@ def to_bool_str():
     @cpython_only
     @requires_specialization_ft
     def test_unpack_sequence(self):
-        def f():
+        def unpack_sequence_two_tuple():
             for _ in range(100):
                 a, b = 1, 2
                 self.assertEqual(a, 1)
                 self.assertEqual(b, 2)
 
-        f()
-        self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE")
-        self.assert_no_opcode(f, "UNPACK_SEQUENCE")
+        unpack_sequence_two_tuple()
+        self.assert_specialized(unpack_sequence_two_tuple,
+                                "UNPACK_SEQUENCE_TWO_TUPLE")
+        self.assert_no_opcode(unpack_sequence_two_tuple, "UNPACK_SEQUENCE")
 
-        def g():
+        def unpack_sequence_tuple():
             for _ in range(100):
                 a, = 1,
                 self.assertEqual(a, 1)
 
-        g()
-        self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE")
-        self.assert_no_opcode(g, "UNPACK_SEQUENCE")
+        unpack_sequence_tuple()
+        self.assert_specialized(unpack_sequence_tuple, "UNPACK_SEQUENCE_TUPLE")
+        self.assert_no_opcode(unpack_sequence_tuple, "UNPACK_SEQUENCE")
 
-        def x():
+        def unpack_sequence_list():
             for _ in range(100):
                 a, b = [1, 2]
                 self.assertEqual(a, 1)
                 self.assertEqual(b, 2)
 
-        x()
-        self.assert_specialized(x, "UNPACK_SEQUENCE_LIST")
-        self.assert_no_opcode(x, "UNPACK_SEQUENCE")
+        unpack_sequence_list()
+        self.assert_specialized(unpack_sequence_list, "UNPACK_SEQUENCE_LIST")
+        self.assert_no_opcode(unpack_sequence_list, "UNPACK_SEQUENCE")
+
+    @cpython_only
+    @requires_specialization_ft
+    def test_binary_subscr(self):
+        def binary_subscr_list_int():
+            for _ in range(100):
+                a = [1, 2, 3]
+                for idx, expected in enumerate(a):
+                    self.assertEqual(a[idx], expected)
+
+        binary_subscr_list_int()
+        self.assert_specialized(binary_subscr_list_int,
+                                "BINARY_SUBSCR_LIST_INT")
+        self.assert_no_opcode(binary_subscr_list_int, "BINARY_SUBSCR")
+
+        def binary_subscr_tuple_int():
+            for _ in range(100):
+                a = (1, 2, 3)
+                for idx, expected in enumerate(a):
+                    self.assertEqual(a[idx], expected)
+
+        binary_subscr_tuple_int()
+        self.assert_specialized(binary_subscr_tuple_int,
+                                "BINARY_SUBSCR_TUPLE_INT")
+        self.assert_no_opcode(binary_subscr_tuple_int, "BINARY_SUBSCR")
+
+        def binary_subscr_dict():
+            for _ in range(100):
+                a = {1: 2, 2: 3}
+                self.assertEqual(a[1], 2)
+                self.assertEqual(a[2], 3)
+
+        binary_subscr_dict()
+        self.assert_specialized(binary_subscr_dict, "BINARY_SUBSCR_DICT")
+        self.assert_no_opcode(binary_subscr_dict, "BINARY_SUBSCR")
+
+        def binary_subscr_str_int():
+            for _ in range(100):
+                a = "foobar"
+                for idx, expected in enumerate(a):
+                    self.assertEqual(a[idx], expected)
+
+        binary_subscr_str_int()
+        self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT")
+        self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR")
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py
index 6a994f890da616..2c48eeeda145d0 100644
--- a/Lib/test/test_pathlib/test_pathlib.py
+++ b/Lib/test/test_pathlib/test_pathlib.py
@@ -546,12 +546,9 @@ def tempdir(self):
         self.addCleanup(os_helper.rmtree, d)
         return d
 
-    def test_matches_pathbase_api(self):
-        our_names = {name for name in dir(self.cls) if name[0] != '_'}
-        our_names.remove('is_reserved')  # only present in PurePath
+    def test_matches_pathbase_docstrings(self):
         path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'}
-        self.assertEqual(our_names, path_names)
-        for attr_name in our_names:
+        for attr_name in path_names:
             if attr_name == 'parser':
                 # On Windows, Path.parser is ntpath, but PathBase.parser is
                 # posixpath, and so their docstrings differ.
@@ -1357,6 +1354,17 @@ def test_symlink_to_unsupported(self):
         with self.assertRaises(pathlib.UnsupportedOperation):
             q.symlink_to(p)
 
+    @needs_symlinks
+    def test_lstat(self):
+        p = self.cls(self.base)/ 'linkA'
+        st = p.stat()
+        self.assertNotEqual(st, p.lstat())
+
+    def test_lstat_nosymlink(self):
+        p = self.cls(self.base) / 'fileA'
+        st = p.stat()
+        self.assertEqual(st, p.lstat())
+
     def test_is_junction(self):
         P = self.cls(self.base)
 
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py
index aaa30a17f2af14..af94ac039808f0 100644
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -1351,7 +1351,6 @@ def test_unsupported_operation(self):
         p = self.cls('')
         e = UnsupportedOperation
         self.assertRaises(e, p.stat)
-        self.assertRaises(e, p.lstat)
         self.assertRaises(e, p.exists)
         self.assertRaises(e, p.samefile, 'foo')
         self.assertRaises(e, p.is_dir)
@@ -1372,9 +1371,7 @@ def test_unsupported_operation(self):
         self.assertRaises(e, p.rglob, '*')
         self.assertRaises(e, lambda: list(p.walk()))
         self.assertRaises(e, p.absolute)
-        self.assertRaises(e, P.cwd)
         self.assertRaises(e, p.expanduser)
-        self.assertRaises(e, p.home)
         self.assertRaises(e, p.readlink)
         self.assertRaises(e, p.symlink_to, 'foo')
         self.assertRaises(e, p.hardlink_to, 'foo')
@@ -2671,17 +2668,6 @@ def test_stat_no_follow_symlinks_nosymlink(self):
         st = p.stat()
         self.assertEqual(st, p.stat(follow_symlinks=False))
 
-    @needs_symlinks
-    def test_lstat(self):
-        p = self.cls(self.base)/ 'linkA'
-        st = p.stat()
-        self.assertNotEqual(st, p.lstat())
-
-    def test_lstat_nosymlink(self):
-        p = self.cls(self.base) / 'fileA'
-        st = p.stat()
-        self.assertEqual(st, p.lstat())
-
     def test_is_dir(self):
         P = self.cls(self.base)
         self.assertTrue((P / 'dirA').is_dir())
@@ -2868,11 +2854,13 @@ def test_delete_dir(self):
         base = self.cls(self.base)
         base.joinpath('dirA')._delete()
         self.assertRaises(FileNotFoundError, base.joinpath('dirA').stat)
-        self.assertRaises(FileNotFoundError, base.joinpath('dirA', 'linkC').lstat)
+        self.assertRaises(FileNotFoundError, base.joinpath('dirA', 'linkC').stat,
+                          follow_symlinks=False)
         base.joinpath('dirB')._delete()
         self.assertRaises(FileNotFoundError, base.joinpath('dirB').stat)
         self.assertRaises(FileNotFoundError, base.joinpath('dirB', 'fileB').stat)
-        self.assertRaises(FileNotFoundError, base.joinpath('dirB', 'linkD').lstat)
+        self.assertRaises(FileNotFoundError, base.joinpath('dirB', 'linkD').stat,
+                          follow_symlinks=False)
         base.joinpath('dirC')._delete()
         self.assertRaises(FileNotFoundError, base.joinpath('dirC').stat)
         self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'dirD').stat)
diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py
index e5f9848319021a..48a4c568651879 100644
--- a/Lib/test/test_pdb.py
+++ b/Lib/test/test_pdb.py
@@ -2931,6 +2931,22 @@ def test_pdb_issue_gh_108976():
     (Pdb) continue
     """
 
+def test_pdb_issue_gh_127321():
+    """See GH-127321
+    breakpoint() should stop at a opcode that has a line number
+    >>> def test_function():
+    ...     import pdb; pdb_instance = pdb.Pdb(nosigint=True, readrc=False)
+    ...     [1, 2] and pdb_instance.set_trace()
+    ...     a = 1
+    >>> with PdbTestInput([  # doctest: +NORMALIZE_WHITESPACE
+    ...     'continue'
+    ... ]):
+    ...    test_function()
+    > <doctest test.test_pdb.test_pdb_issue_gh_127321[0]>(4)test_function()
+    -> a = 1
+    (Pdb) continue
+    """
+
 
 def test_pdb_issue_gh_80731():
     """See GH-80731
diff --git a/Lib/test/test_sqlite3/test_regression.py b/Lib/test/test_sqlite3/test_regression.py
index db4e13222da9da..50cced3891d13a 100644
--- a/Lib/test/test_sqlite3/test_regression.py
+++ b/Lib/test/test_sqlite3/test_regression.py
@@ -433,6 +433,7 @@ def test_table_lock_cursor_dealloc(self):
             con.commit()
             cur = con.execute("select t from t")
             del cur
+            support.gc_collect()
             con.execute("drop table t")
             con.commit()
 
@@ -448,6 +449,7 @@ def dup(v):
             con.create_function("dup", 1, dup)
             cur = con.execute("select dup(t) from t")
             del cur
+            support.gc_collect()
             con.execute("drop table t")
             con.commit()
 
diff --git a/Lib/test/test_tkinter/test_misc.py b/Lib/test/test_tkinter/test_misc.py
index 579ce2af9fa0bf..475edcbd5338a7 100644
--- a/Lib/test/test_tkinter/test_misc.py
+++ b/Lib/test/test_tkinter/test_misc.py
@@ -123,9 +123,9 @@ def test_tk_setPalette(self):
     def test_after(self):
         root = self.root
 
-        def callback(start=0, step=1):
+        def callback(start=0, step=1, *, end=0):
             nonlocal count
-            count = start + step
+            count = start + step + end
 
         # Without function, sleeps for ms.
         self.assertIsNone(root.after(1))
@@ -161,12 +161,18 @@ def callback(start=0, step=1):
         root.update()  # Process all pending events.
         self.assertEqual(count, 53)
 
+        # Set up with callback with keyword args.
+        count = 0
+        timer1 = root.after(0, callback, 42, step=11, end=1)
+        root.update()  # Process all pending events.
+        self.assertEqual(count, 54)
+
     def test_after_idle(self):
         root = self.root
 
-        def callback(start=0, step=1):
+        def callback(start=0, step=1, *, end=0):
             nonlocal count
-            count = start + step
+            count = start + step + end
 
         # Set up with callback with no args.
         count = 0
@@ -193,6 +199,12 @@ def callback(start=0, step=1):
         with self.assertRaises(tkinter.TclError):
             root.tk.call(script)
 
+        # Set up with callback with keyword args.
+        count = 0
+        idle1 = root.after_idle(callback, 42, step=11, end=1)
+        root.update()  # Process all pending events.
+        self.assertEqual(count, 54)
+
     def test_after_cancel(self):
         root = self.root
 
diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py
index dd7b3e138f4236..bfec04bb6c1e6e 100644
--- a/Lib/tkinter/__init__.py
+++ b/Lib/tkinter/__init__.py
@@ -847,7 +847,7 @@ def tk_focusPrev(self):
         if not name: return None
         return self._nametowidget(name)
 
-    def after(self, ms, func=None, *args):
+    def after(self, ms, func=None, *args, **kw):
         """Call function once after given time.
 
         MS specifies the time in milliseconds. FUNC gives the
@@ -861,7 +861,7 @@ def after(self, ms, func=None, *args):
         else:
             def callit():
                 try:
-                    func(*args)
+                    func(*args, **kw)
                 finally:
                     try:
                         self.deletecommand(name)
@@ -875,13 +875,13 @@ def callit():
             name = self._register(callit)
             return self.tk.call('after', ms, name)
 
-    def after_idle(self, func, *args):
+    def after_idle(self, func, *args, **kw):
         """Call FUNC once if the Tcl main loop has no event to
         process.
 
         Return an identifier to cancel the scheduling with
         after_cancel."""
-        return self.after('idle', func, *args)
+        return self.after('idle', func, *args, **kw)
 
     def after_cancel(self, id):
         """Cancel scheduling of function identified with ID.
diff --git a/Misc/ACKS b/Misc/ACKS
index cd34846574b304..fc4b83a0e2b823 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -2079,6 +2079,7 @@ Arnon Yaari
 Alakshendra Yadav
 Hirokazu Yamamoto
 Masayuki Yamamoto
+Zhikang Yan
 Jingchen Ye
 Ka-Ping Yee
 Chi Hsuan Yen
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst
new file mode 100644
index 00000000000000..9ef2b8dc33ed0f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst
@@ -0,0 +1,4 @@
+Add a marking phase to the GC. All objects that can be transitively reached
+from builtin modules or the stacks are marked as reachable before cycle
+detection. This reduces the amount of work done by the GC by approximately
+half.
diff --git a/Misc/NEWS.d/next/Library/2024-11-16-10-52-48.gh-issue-126899.GFnfBt.rst b/Misc/NEWS.d/next/Library/2024-11-16-10-52-48.gh-issue-126899.GFnfBt.rst
new file mode 100644
index 00000000000000..c1a0ed6438135d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-16-10-52-48.gh-issue-126899.GFnfBt.rst
@@ -0,0 +1,2 @@
+Make tkinter widget methods :meth:`!after` and :meth:`!after_idle` accept
+arguments passed by keyword.
diff --git a/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst b/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst
new file mode 100644
index 00000000000000..708c1a6437d812
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst
@@ -0,0 +1,3 @@
+Fixed bug where, on cross-builds, the :mod:`sysconfig` POSIX data was being
+generated with the host Python's ``Makefile``. The data is now generated from
+current build's ``Makefile``.
diff --git a/Misc/NEWS.d/next/Library/2024-11-30-21-46-15.gh-issue-127321.M78fBv.rst b/Misc/NEWS.d/next/Library/2024-11-30-21-46-15.gh-issue-127321.M78fBv.rst
new file mode 100644
index 00000000000000..69b6ce68a47509
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-30-21-46-15.gh-issue-127321.M78fBv.rst
@@ -0,0 +1 @@
+:func:`pdb.set_trace` will not stop at an opcode that does not have an associated line number anymore.
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index a925191d479bd6..1bb71a3e80b39d 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -2076,6 +2076,11 @@ has_deferred_refcount(PyObject *self, PyObject *op)
     return PyBool_FromLong(_PyObject_HasDeferredRefcount(op));
 }
 
+static PyObject *
+get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size);
+}
 
 static PyMethodDef module_functions[] = {
     {"get_configs", get_configs, METH_NOARGS},
@@ -2174,6 +2179,7 @@ static PyMethodDef module_functions[] = {
     {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS},
     {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS},
     {"has_deferred_refcount", has_deferred_refcount, METH_O},
+    {"get_tracked_heap_size", get_tracked_heap_size, METH_NOARGS},
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 49b213eaa817e2..a13d8084d14d66 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -7300,7 +7300,7 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj)
 
     // We could be called with an unlocked dict when the caller knows the
     // values are already detached, so we assert after inline values check.
-    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(mp);
+    ASSERT_WORLD_STOPPED_OR_OBJ_LOCKED(mp);
     assert(mp->ma_values->embedded == 1);
     assert(mp->ma_values->valid == 1);
     assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES);
diff --git a/Objects/listobject.c b/Objects/listobject.c
index bb0040cbe9f272..4b24f4a428e18b 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -391,6 +391,12 @@ PyList_GetItemRef(PyObject *op, Py_ssize_t i)
     return item;
 }
 
+PyObject *
+_PyList_GetItemRef(PyListObject *list, Py_ssize_t i)
+{
+    return list_get_item_ref(list, i);
+}
+
 int
 PyList_SetItem(PyObject *op, Py_ssize_t i,
                PyObject *newitem)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ab4f07ed054385..463da06445984b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5014,21 +5014,26 @@ ctz(size_t v)
 #endif /* SIZEOF_SIZE_T */
     return pos;
 }
+#else
+#define HAVE_CTZ 0
 #endif
 
-#if HAVE_CTZ
-// load p[0]..p[size-1] as a little-endian size_t
-// without unaligned access nor read ahead.
+#if HAVE_CTZ && PY_LITTLE_ENDIAN
+// load p[0]..p[size-1] as a size_t without unaligned access nor read ahead.
 static size_t
 load_unaligned(const unsigned char *p, size_t size)
 {
-    assert(size <= SIZEOF_SIZE_T);
     union {
         size_t s;
         unsigned char b[SIZEOF_SIZE_T];
     } u;
     u.s = 0;
+    // This switch statement assumes little endian because:
+    // * union is faster than bitwise or and shift.
+    // * big endian machine is rare and hard to maintain.
     switch (size) {
+    default:
+#if SIZEOF_SIZE_T == 8
     case 8:
         u.b[7] = p[7];
         _Py_FALLTHROUGH;
@@ -5041,6 +5046,7 @@ load_unaligned(const unsigned char *p, size_t size)
     case 5:
         u.b[4] = p[4];
         _Py_FALLTHROUGH;
+#endif
     case 4:
         u.b[3] = p[3];
         _Py_FALLTHROUGH;
@@ -5055,8 +5061,6 @@ load_unaligned(const unsigned char *p, size_t size)
         break;
     case 0:
         break;
-    default:
-        Py_UNREACHABLE();
     }
     return u.s;
 }
@@ -5077,8 +5081,8 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
 
     if (end - start >= SIZEOF_SIZE_T) {
         const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T);
+#if PY_LITTLE_ENDIAN && HAVE_CTZ
         if (p < p2) {
-#if HAVE_CTZ
 #if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
             // x86 and amd64 are little endian and can load unaligned memory.
             size_t u = *(const size_t*)p & ASCII_CHAR_MASK;
@@ -5086,11 +5090,11 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
             size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK;
 #endif
             if (u) {
-                return p - start + (ctz(u) - 7) / 8;
+                return (ctz(u) - 7) / 8;
             }
             p = p2;
         }
-#else
+#else /* PY_LITTLE_ENDIAN && HAVE_CTZ */
         while (p < p2) {
             if (*p & 0x80) {
                 return p - start;
@@ -5113,7 +5117,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
             p += SIZEOF_SIZE_T;
         }
     }
-#if HAVE_CTZ
+#if PY_LITTLE_ENDIAN && HAVE_CTZ
     // we can not use *(const size_t*)p to avoid buffer overrun.
     size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK;
     if (u) {
@@ -16154,7 +16158,7 @@ encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
     int res;
     res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
     if (res == -2) {
-        PyErr_Format(PyExc_RuntimeWarning, "cannot encode %s", name);
+        PyErr_Format(PyExc_RuntimeError, "cannot encode %s", name);
         return -1;
     }
     if (res < 0) {
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index a14b32b8108be8..c07ec42ec68f8b 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -704,7 +704,7 @@ dummy_func(
         };
 
         specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) {
-            #if ENABLE_SPECIALIZATION
+            #if ENABLE_SPECIALIZATION_FT
             assert(frame->stackpointer == NULL);
             if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 next_instr = this_instr;
@@ -713,7 +713,7 @@ dummy_func(
             }
             OPCODE_DEFERRED_INC(BINARY_SUBSCR);
             ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-            #endif  /* ENABLE_SPECIALIZATION */
+            #endif  /* ENABLE_SPECIALIZATION_FT */
         }
 
         op(_BINARY_SUBSCR, (container, sub -- res)) {
@@ -790,11 +790,17 @@ dummy_func(
             // Deopt unless 0 <= sub < PyList_Size(list)
             DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
             Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
+#ifdef Py_GIL_DISABLED
+            PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index);
+            DEOPT_IF(res_o == NULL);
+            STAT_INC(BINARY_SUBSCR, hit);
+#else
             DEOPT_IF(index >= PyList_GET_SIZE(list));
             STAT_INC(BINARY_SUBSCR, hit);
             PyObject *res_o = PyList_GET_ITEM(list, index);
             assert(res_o != NULL);
             Py_INCREF(res_o);
+#endif
             PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free);
             DEAD(sub_st);
             PyStackRef_CLOSE(list_st);
diff --git a/Python/ceval.c b/Python/ceval.c
index eba0f233a81ef3..f9514a6bf25c1b 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -818,6 +818,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
     entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1;
     entry_frame.stackpointer = entry_frame.localsplus;
     entry_frame.owner = FRAME_OWNED_BY_CSTACK;
+    entry_frame.visited = 0;
     entry_frame.return_offset = 0;
     /* Push frame */
     entry_frame.previous = tstate->current_frame;
diff --git a/Python/crossinterp.c b/Python/crossinterp.c
index 7aaa045f375cf0..0a106ad636bfe8 100644
--- a/Python/crossinterp.c
+++ b/Python/crossinterp.c
@@ -342,6 +342,11 @@ _copy_string_obj_raw(PyObject *strobj, Py_ssize_t *p_size)
         return NULL;
     }
 
+    if (size != (Py_ssize_t)strlen(str)) {
+        PyErr_SetString(PyExc_ValueError, "found embedded NULL character");
+        return NULL;
+    }
+
     char *copied = PyMem_RawMalloc(size+1);
     if (copied == NULL) {
         PyErr_NoMemory();
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index d46412a193332b..c91257b06cad11 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -981,6 +981,16 @@
                 JUMP_TO_JUMP_TARGET();
             }
             Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
+            #ifdef Py_GIL_DISABLED
+            _PyFrame_SetStackPointer(frame, stack_pointer);
+            PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index);
+            stack_pointer = _PyFrame_GetStackPointer(frame);
+            if (res_o == NULL) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            STAT_INC(BINARY_SUBSCR, hit);
+            #else
             if (index >= PyList_GET_SIZE(list)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
@@ -989,6 +999,7 @@
             PyObject *res_o = PyList_GET_ITEM(list, index);
             assert(res_o != NULL);
             Py_INCREF(res_o);
+            #endif
             PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free);
             PyStackRef_CLOSE(list_st);
             res = PyStackRef_FromPyObjectSteal(res_o);
diff --git a/Python/gc.c b/Python/gc.c
index 63adecf0e05114..5b9588c8741b97 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -106,7 +106,7 @@ gc_old_space(PyGC_Head *g)
 }
 
 static inline int
-flip_old_space(int space)
+other_space(int space)
 {
     assert(space == 0 || space == 1);
     return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1;
@@ -430,24 +430,32 @@ validate_list(PyGC_Head *head, enum flagstates flags)
 #endif
 
 #ifdef GC_EXTRA_DEBUG
+
+
 static void
-validate_old(GCState *gcstate)
+gc_list_validate_space(PyGC_Head *head, int space) {
+    PyGC_Head *gc = GC_NEXT(head);
+    while (gc != head) {
+        assert(gc_old_space(gc) == space);
+        gc = GC_NEXT(gc);
+    }
+}
+
+static void
+validate_spaces(GCState *gcstate)
 {
+    int visited = gcstate->visited_space;
+    int not_visited = other_space(visited);
+    gc_list_validate_space(&gcstate->young.head, not_visited);
     for (int space = 0; space < 2; space++) {
-        PyGC_Head *head = &gcstate->old[space].head;
-        PyGC_Head *gc = GC_NEXT(head);
-        while (gc != head) {
-            PyGC_Head *next = GC_NEXT(gc);
-            assert(gc_old_space(gc) == space);
-            gc = next;
-        }
+        gc_list_validate_space(&gcstate->old[space].head, space);
     }
+    gc_list_validate_space(&gcstate->permanent_generation.head, visited);
 }
 
 static void
 validate_consistent_old_space(PyGC_Head *head)
 {
-    PyGC_Head *prev = head;
     PyGC_Head *gc = GC_NEXT(head);
     if (gc == head) {
         return;
@@ -457,23 +465,13 @@ validate_consistent_old_space(PyGC_Head *head)
         PyGC_Head *truenext = GC_NEXT(gc);
         assert(truenext != NULL);
         assert(gc_old_space(gc) == old_space);
-        prev = gc;
         gc = truenext;
     }
-    assert(prev == GC_PREV(head));
 }
 
-static void
-gc_list_validate_space(PyGC_Head *head, int space) {
-    PyGC_Head *gc = GC_NEXT(head);
-    while (gc != head) {
-        assert(gc_old_space(gc) == space);
-        gc = GC_NEXT(gc);
-    }
-}
 
 #else
-#define validate_old(g) do{}while(0)
+#define validate_spaces(g) do{}while(0)
 #define validate_consistent_old_space(l) do{}while(0)
 #define gc_list_validate_space(l, s) do{}while(0)
 #endif
@@ -494,7 +492,7 @@ update_refs(PyGC_Head *containers)
         next = GC_NEXT(gc);
         PyObject *op = FROM_GC(gc);
         if (_Py_IsImmortal(op)) {
-           gc_list_move(gc, &get_gc_state()->permanent_generation.head);
+            _PyObject_GC_UNTRACK(op);
            gc = next;
            continue;
         }
@@ -733,13 +731,25 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
     unreachable->_gc_next &= _PyGC_PREV_MASK;
 }
 
+/* In theory, all tuples should be younger than the
+* objects they refer to, as tuples are immortal.
+* Therefore, untracking tuples in oldest-first order in the
+* young generation before promoting them should have tracked
+* all the tuples that can be untracked.
+*
+* Unfortunately, the C API allows tuples to be created
+* and then filled in. So this won't untrack all tuples
+* that can be untracked. It should untrack most of them
+* and is much faster than a more complex approach that
+* would untrack all relevant tuples.
+*/
 static void
 untrack_tuples(PyGC_Head *head)
 {
-    PyGC_Head *next, *gc = GC_NEXT(head);
+    PyGC_Head *gc = GC_NEXT(head);
     while (gc != head) {
         PyObject *op = FROM_GC(gc);
-        next = GC_NEXT(gc);
+        PyGC_Head *next = GC_NEXT(gc);
         if (PyTuple_CheckExact(op)) {
             _PyTuple_MaybeUntrack(op);
         }
@@ -1293,8 +1303,10 @@ gc_collect_young(PyThreadState *tstate,
                  struct gc_collection_stats *stats)
 {
     GCState *gcstate = &tstate->interp->gc;
+    validate_spaces(gcstate);
     PyGC_Head *young = &gcstate->young.head;
     PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+    untrack_tuples(young);
     GC_STAT_ADD(0, collections, 1);
 #ifdef Py_STATS
     {
@@ -1308,39 +1320,21 @@ gc_collect_young(PyThreadState *tstate,
 
     PyGC_Head survivors;
     gc_list_init(&survivors);
+    gc_list_set_space(young, gcstate->visited_space);
     gc_collect_region(tstate, young, &survivors, stats);
-    Py_ssize_t survivor_count = 0;
-    if (gcstate->visited_space) {
-        /* objects in visited space have bit set, so we set it here */
-        survivor_count = gc_list_set_space(&survivors, 1);
-    }
-    else {
-        PyGC_Head *gc;
-        for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) {
-#ifdef GC_DEBUG
-            assert(gc_old_space(gc) == 0);
-#endif
-            survivor_count++;
-        }
-    }
-    (void)survivor_count;  // Silence compiler warning
     gc_list_merge(&survivors, visited);
-    validate_old(gcstate);
+    validate_spaces(gcstate);
     gcstate->young.count = 0;
     gcstate->old[gcstate->visited_space].count++;
-    Py_ssize_t scale_factor = gcstate->old[0].threshold;
-    if (scale_factor < 1) {
-        scale_factor = 1;
-    }
-    gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
     add_stats(gcstate, 0, stats);
+    validate_spaces(gcstate);
 }
 
 #ifndef NDEBUG
 static inline int
 IS_IN_VISITED(PyGC_Head *gc, int visited_space)
 {
-    assert(visited_space == 0 || flip_old_space(visited_space) == 0);
+    assert(visited_space == 0 || other_space(visited_space) == 0);
     return gc_old_space(gc) == visited_space;
 }
 #endif
@@ -1348,7 +1342,7 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space)
 struct container_and_flag {
     PyGC_Head *container;
     int visited_space;
-    uintptr_t size;
+    intptr_t size;
 };
 
 /* A traversal callback for adding to container) */
@@ -1371,7 +1365,7 @@ visit_add_to_container(PyObject *op, void *arg)
     return 0;
 }
 
-static uintptr_t
+static intptr_t
 expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
 {
     struct container_and_flag arg = {
@@ -1385,6 +1379,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
          * have been marked as visited */
         assert(IS_IN_VISITED(gc, gcstate->visited_space));
         PyObject *op = FROM_GC(gc);
+        assert(_PyObject_GC_IS_TRACKED(op));
         if (_Py_IsImmortal(op)) {
             PyGC_Head *next = GC_NEXT(gc);
             gc_list_move(gc, &get_gc_state()->permanent_generation.head);
@@ -1402,22 +1397,191 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
 
 /* Do bookkeeping for a completed GC cycle */
 static void
-completed_cycle(GCState *gcstate)
-{
-#ifdef Py_DEBUG
-    PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
-    assert(gc_list_is_empty(not_visited));
-#endif
-    gcstate->visited_space = flip_old_space(gcstate->visited_space);
-    /* Make sure all young objects have old space bit set correctly */
-    PyGC_Head *young = &gcstate->young.head;
-    PyGC_Head *gc = GC_NEXT(young);
-    while (gc != young) {
-        PyGC_Head *next = GC_NEXT(gc);
-        gc_set_old_space(gc, gcstate->visited_space);
-        gc = next;
+completed_scavenge(GCState *gcstate)
+{
+    /* We must observe two invariants:
+    * 1. Members of the permanent generation must be marked visited.
+    * 2. We cannot touch members of the permanent generation. */
+    int visited;
+    if (gc_list_is_empty(&gcstate->permanent_generation.head)) {
+        /* Permanent generation is empty so we can flip spaces bit */
+        int not_visited = gcstate->visited_space;
+        visited = other_space(not_visited);
+        gcstate->visited_space = visited;
+        /* Make sure all objects have visited bit set correctly */
+        gc_list_set_space(&gcstate->young.head, not_visited);
     }
+    else {
+         /* We must move the objects from visited to pending space. */
+        visited = gcstate->visited_space;
+        int not_visited = other_space(visited);
+        assert(gc_list_is_empty(&gcstate->old[not_visited].head));
+        gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head);
+        gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
+    }
+    assert(gc_list_is_empty(&gcstate->old[visited].head));
     gcstate->work_to_do = 0;
+    gcstate->phase = GC_PHASE_MARK;
+}
+
+static intptr_t
+move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space)
+{
+    if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
+        PyGC_Head *gc = AS_GC(op);
+        if (_PyObject_GC_IS_TRACKED(op) &&
+            gc_old_space(gc) != visited_space) {
+            gc_flip_old_space(gc);
+            gc_list_move(gc, reachable);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static intptr_t
+mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space)
+{
+    // Transitively traverse all objects from reachable, until empty
+    struct container_and_flag arg = {
+        .container = reachable,
+        .visited_space = visited_space,
+        .size = 0
+    };
+    while (!gc_list_is_empty(reachable)) {
+        PyGC_Head *gc = _PyGCHead_NEXT(reachable);
+        assert(gc_old_space(gc) == visited_space);
+        gc_list_move(gc, visited);
+        PyObject *op = FROM_GC(gc);
+        traverseproc traverse = Py_TYPE(op)->tp_traverse;
+        (void) traverse(op,
+                        visit_add_to_container,
+                        &arg);
+    }
+    gc_list_validate_space(visited, visited_space);
+    return arg.size;
+}
+
+static intptr_t
+mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start)
+{
+    PyGC_Head reachable;
+    gc_list_init(&reachable);
+    Py_ssize_t objects_marked = 0;
+    // Move all objects on stacks to reachable
+    _PyRuntimeState *runtime = &_PyRuntime;
+    HEAD_LOCK(runtime);
+    PyThreadState* ts = PyInterpreterState_ThreadHead(interp);
+    HEAD_UNLOCK(runtime);
+    while (ts) {
+        _PyInterpreterFrame *frame = ts->current_frame;
+        while (frame) {
+            if (frame->owner == FRAME_OWNED_BY_CSTACK) {
+                frame = frame->previous;
+                continue;
+            }
+            _PyStackRef *locals = frame->localsplus;
+            _PyStackRef *sp = frame->stackpointer;
+            objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space);
+            PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
+            objects_marked += move_to_reachable(func, &reachable, visited_space);
+            while (sp > locals) {
+                sp--;
+                if (PyStackRef_IsNull(*sp)) {
+                    continue;
+                }
+                PyObject *op = PyStackRef_AsPyObjectBorrow(*sp);
+                if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) {
+                    PyGC_Head *gc = AS_GC(op);
+                    if (_PyObject_GC_IS_TRACKED(op) &&
+                        gc_old_space(gc) != visited_space) {
+                        gc_flip_old_space(gc);
+                        objects_marked++;
+                        gc_list_move(gc, &reachable);
+                    }
+                }
+            }
+            if (!start && frame->visited) {
+                // If this frame has already been visited, then the lower frames
+                // will have already been visited and will not have changed
+                break;
+            }
+            frame->visited = 1;
+            frame = frame->previous;
+        }
+        HEAD_LOCK(runtime);
+        ts = PyThreadState_Next(ts);
+        HEAD_UNLOCK(runtime);
+    }
+    objects_marked += mark_all_reachable(&reachable, visited, visited_space);
+    assert(gc_list_is_empty(&reachable));
+    return objects_marked;
+}
+
+static intptr_t
+mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space)
+{
+    PyGC_Head reachable;
+    gc_list_init(&reachable);
+    Py_ssize_t objects_marked = 0;
+    objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space);
+    objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space);
+    objects_marked += move_to_reachable(interp->dict, &reachable, visited_space);
+    struct types_state *types = &interp->types;
+    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) {
+        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space);
+        objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space);
+    }
+    for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) {
+        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space);
+        objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space);
+    }
+    objects_marked += mark_all_reachable(&reachable, visited, visited_space);
+    assert(gc_list_is_empty(&reachable));
+    return objects_marked;
+}
+
+static intptr_t
+mark_at_start(PyThreadState *tstate)
+{
+    // TO DO -- Make this incremental
+    GCState *gcstate = &tstate->interp->gc;
+    PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+    Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
+    objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
+    gcstate->work_to_do -= objects_marked;
+    gcstate->phase = GC_PHASE_COLLECT;
+    validate_spaces(gcstate);
+    return objects_marked;
+}
+
+static intptr_t
+assess_work_to_do(GCState *gcstate)
+{
+    /* The amount of work we want to do depends on three things.
+     * 1. The number of new objects created
+     * 2. The growth in heap size since the last collection
+     * 3. The heap size (up to the number of new objects, to avoid quadratic effects)
+     *
+     * For a steady state heap, the amount of work to do is three times the number
+     * of new objects added to the heap. This ensures that we stay ahead in the
+     * worst case of all new objects being garbage.
+     *
+     * This could be improved by tracking survival rates, but it is still a
+     * large improvement on the non-marking approach.
+     */
+    intptr_t scale_factor = gcstate->old[0].threshold;
+    if (scale_factor < 2) {
+        scale_factor = 2;
+    }
+    intptr_t new_objects = gcstate->young.count;
+    intptr_t max_heap_fraction = new_objects*3/2;
+    intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
+    if (heap_fraction > max_heap_fraction) {
+        heap_fraction = max_heap_fraction;
+    }
+    gcstate->young.count = 0;
+    return new_objects + heap_fraction;
 }
 
 static void
@@ -1425,18 +1589,30 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
 {
     GC_STAT_ADD(1, collections, 1);
     GCState *gcstate = &tstate->interp->gc;
+    gcstate->work_to_do += assess_work_to_do(gcstate);
+    untrack_tuples(&gcstate->young.head);
+    if (gcstate->phase == GC_PHASE_MARK) {
+        Py_ssize_t objects_marked = mark_at_start(tstate);
+        GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
+        gcstate->work_to_do -= objects_marked;
+        validate_spaces(gcstate);
+        return;
+    }
     PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
     PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
     PyGC_Head increment;
     gc_list_init(&increment);
-    Py_ssize_t scale_factor = gcstate->old[0].threshold;
-    if (scale_factor < 1) {
-        scale_factor = 1;
-    }
+    int scale_factor = gcstate->old[0].threshold;
+    if (scale_factor < 2) {
+        scale_factor = 2;
+    }
+    intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
+    GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
+    gcstate->work_to_do -= objects_marked;
+    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
     gc_list_merge(&gcstate->young.head, &increment);
-    gcstate->young.count = 0;
     gc_list_validate_space(&increment, gcstate->visited_space);
-    Py_ssize_t increment_size = 0;
+    Py_ssize_t increment_size = gc_list_size(&increment);
     while (increment_size < gcstate->work_to_do) {
         if (gc_list_is_empty(not_visited)) {
             break;
@@ -1444,54 +1620,56 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
         PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
         gc_list_move(gc, &increment);
         increment_size++;
+        assert(!_Py_IsImmortal(FROM_GC(gc)));
         gc_set_old_space(gc, gcstate->visited_space);
         increment_size += expand_region_transitively_reachable(&increment, gc, gcstate);
     }
+    GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size);
     validate_list(&increment, collecting_clear_unreachable_clear);
     gc_list_validate_space(&increment, gcstate->visited_space);
     PyGC_Head survivors;
     gc_list_init(&survivors);
     gc_collect_region(tstate, &increment, &survivors, stats);
-    gc_list_validate_space(&survivors, gcstate->visited_space);
     gc_list_merge(&survivors, visited);
     assert(gc_list_is_empty(&increment));
     gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
     gcstate->work_to_do -= increment_size;
 
-    validate_old(gcstate);
     add_stats(gcstate, 1, stats);
     if (gc_list_is_empty(not_visited)) {
-        completed_cycle(gcstate);
+        completed_scavenge(gcstate);
     }
+    validate_spaces(gcstate);
 }
 
-
 static void
 gc_collect_full(PyThreadState *tstate,
                 struct gc_collection_stats *stats)
 {
     GC_STAT_ADD(2, collections, 1);
     GCState *gcstate = &tstate->interp->gc;
-    validate_old(gcstate);
+    validate_spaces(gcstate);
     PyGC_Head *young = &gcstate->young.head;
     PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
     PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+    untrack_tuples(young);
     /* merge all generations into visited */
-    gc_list_validate_space(young, gcstate->visited_space);
-    gc_list_set_space(pending, gcstate->visited_space);
     gc_list_merge(young, pending);
+    gc_list_validate_space(pending, 1-gcstate->visited_space);
+    gc_list_set_space(pending, gcstate->visited_space);
     gcstate->young.count = 0;
     gc_list_merge(pending, visited);
+    validate_spaces(gcstate);
 
     gc_collect_region(tstate, visited, visited,
                       stats);
+    validate_spaces(gcstate);
     gcstate->young.count = 0;
     gcstate->old[0].count = 0;
     gcstate->old[1].count = 0;
-
-    gcstate->work_to_do = - gcstate->young.threshold * 2;
+    completed_scavenge(gcstate);
     _PyGC_ClearAllFreeLists(tstate->interp);
-    validate_old(gcstate);
+    validate_spaces(gcstate);
     add_stats(gcstate, 2, stats);
 }
 
@@ -1733,20 +1911,23 @@ void
 _PyGC_Freeze(PyInterpreterState *interp)
 {
     GCState *gcstate = &interp->gc;
-    /* The permanent_generation has its old space bit set to zero */
-    if (gcstate->visited_space) {
-        gc_list_set_space(&gcstate->young.head, 0);
-    }
+    /* The permanent_generation must be visited */
+    gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
     gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
     gcstate->young.count = 0;
     PyGC_Head*old0 = &gcstate->old[0].head;
     PyGC_Head*old1 = &gcstate->old[1].head;
+    if (gcstate->visited_space) {
+        gc_list_set_space(old0, 1);
+    }
+    else {
+        gc_list_set_space(old1, 0);
+    }
     gc_list_merge(old0, &gcstate->permanent_generation.head);
     gcstate->old[0].count = 0;
-    gc_list_set_space(old1, 0);
     gc_list_merge(old1, &gcstate->permanent_generation.head);
     gcstate->old[1].count = 0;
-    validate_old(gcstate);
+    validate_spaces(gcstate);
 }
 
 void
@@ -1754,8 +1935,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp)
 {
     GCState *gcstate = &interp->gc;
     gc_list_merge(&gcstate->permanent_generation.head,
-                  &gcstate->old[0].head);
-    validate_old(gcstate);
+                  &gcstate->old[gcstate->visited_space].head);
+    validate_spaces(gcstate);
 }
 
 Py_ssize_t
@@ -1860,7 +2041,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
         _Py_stats->object_stats.object_visits = 0;
     }
 #endif
-    validate_old(gcstate);
+    validate_spaces(gcstate);
     _Py_atomic_store_int(&gcstate->collecting, 0);
     return stats.uncollectable + stats.collected;
 }
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index c9a5132269398c..45bcc4242af9d7 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -433,7 +433,7 @@
                 container = stack_pointer[-2];
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
-                #if ENABLE_SPECIALIZATION
+                #if ENABLE_SPECIALIZATION_FT
                 assert(frame->stackpointer == NULL);
                 if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     next_instr = this_instr;
@@ -444,7 +444,7 @@
                 }
                 OPCODE_DEFERRED_INC(BINARY_SUBSCR);
                 ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-                #endif  /* ENABLE_SPECIALIZATION */
+                #endif  /* ENABLE_SPECIALIZATION_FT */
             }
             // _BINARY_SUBSCR
             {
@@ -577,11 +577,19 @@
             // Deopt unless 0 <= sub < PyList_Size(list)
             DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR);
             Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
+            #ifdef Py_GIL_DISABLED
+            _PyFrame_SetStackPointer(frame, stack_pointer);
+            PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index);
+            stack_pointer = _PyFrame_GetStackPointer(frame);
+            DEOPT_IF(res_o == NULL, BINARY_SUBSCR);
+            STAT_INC(BINARY_SUBSCR, hit);
+            #else
             DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR);
             STAT_INC(BINARY_SUBSCR, hit);
             PyObject *res_o = PyList_GET_ITEM(list, index);
             assert(res_o != NULL);
             Py_INCREF(res_o);
+            #endif
             PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free);
             PyStackRef_CLOSE(list_st);
             res = PyStackRef_FromPyObjectSteal(res_o);
diff --git a/Python/import.c b/Python/import.c
index 09fe95fa1fb647..b3c384c27718ce 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1157,12 +1157,14 @@ del_extensions_cache_value(struct extensions_cache_value *value)
 static void *
 hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep)
 {
-    Py_ssize_t str1_len, str2_len;
-    const char *str1_data = PyUnicode_AsUTF8AndSize(str1, &str1_len);
-    const char *str2_data = PyUnicode_AsUTF8AndSize(str2, &str2_len);
+    const char *str1_data = _PyUnicode_AsUTF8NoNUL(str1);
+    const char *str2_data = _PyUnicode_AsUTF8NoNUL(str2);
     if (str1_data == NULL || str2_data == NULL) {
         return NULL;
     }
+    Py_ssize_t str1_len = strlen(str1_data);
+    Py_ssize_t str2_len = strlen(str2_data);
+
     /* Make sure sep and the NULL byte won't cause an overflow. */
     assert(SIZE_MAX - str1_len - str2_len > 2);
     size_t size = str1_len + 1 + str2_len + 1;
diff --git a/Python/specialize.c b/Python/specialize.c
index 172dae7d374602..504eef4f448429 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -231,6 +231,8 @@ print_gc_stats(FILE *out, GCStats *stats)
         fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
         fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
         fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
+        fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
+        fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
     }
 }
 
@@ -1717,15 +1719,15 @@ _Py_Specialize_BinarySubscr(
     PyObject *container = PyStackRef_AsPyObjectBorrow(container_st);
     PyObject *sub = PyStackRef_AsPyObjectBorrow(sub_st);
 
-    assert(ENABLE_SPECIALIZATION);
+    assert(ENABLE_SPECIALIZATION_FT);
     assert(_PyOpcode_Caches[BINARY_SUBSCR] ==
            INLINE_CACHE_ENTRIES_BINARY_SUBSCR);
-    _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1);
     PyTypeObject *container_type = Py_TYPE(container);
+    uint8_t specialized_op;
     if (container_type == &PyList_Type) {
         if (PyLong_CheckExact(sub)) {
             if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
-                instr->op.code = BINARY_SUBSCR_LIST_INT;
+                specialized_op = BINARY_SUBSCR_LIST_INT;
                 goto success;
             }
             SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
@@ -1738,7 +1740,7 @@ _Py_Specialize_BinarySubscr(
     if (container_type == &PyTuple_Type) {
         if (PyLong_CheckExact(sub)) {
             if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
-                instr->op.code = BINARY_SUBSCR_TUPLE_INT;
+                specialized_op = BINARY_SUBSCR_TUPLE_INT;
                 goto success;
             }
             SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
@@ -1751,7 +1753,7 @@ _Py_Specialize_BinarySubscr(
     if (container_type == &PyUnicode_Type) {
         if (PyLong_CheckExact(sub)) {
             if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) {
-                instr->op.code = BINARY_SUBSCR_STR_INT;
+                specialized_op = BINARY_SUBSCR_STR_INT;
                 goto success;
             }
             SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE);
@@ -1762,9 +1764,10 @@ _Py_Specialize_BinarySubscr(
         goto fail;
     }
     if (container_type == &PyDict_Type) {
-        instr->op.code = BINARY_SUBSCR_DICT;
+        specialized_op = BINARY_SUBSCR_DICT;
         goto success;
     }
+#ifndef Py_GIL_DISABLED
     PyTypeObject *cls = Py_TYPE(container);
     PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__));
     if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) {
@@ -1797,21 +1800,17 @@ _Py_Specialize_BinarySubscr(
         // struct _specialization_cache):
         ht->_spec_cache.getitem = descriptor;
         ht->_spec_cache.getitem_version = version;
-        instr->op.code = BINARY_SUBSCR_GETITEM;
+        specialized_op = BINARY_SUBSCR_GETITEM;
         goto success;
     }
+#endif   // Py_GIL_DISABLED
     SPECIALIZATION_FAIL(BINARY_SUBSCR,
                         binary_subscr_fail_kind(container_type, sub));
 fail:
-    STAT_INC(BINARY_SUBSCR, failure);
-    assert(!PyErr_Occurred());
-    instr->op.code = BINARY_SUBSCR;
-    cache->counter = adaptive_counter_backoff(cache->counter);
+    unspecialize(instr);
     return;
 success:
-    STAT_INC(BINARY_SUBSCR, success);
-    assert(!PyErr_Occurred());
-    cache->counter = adaptive_counter_cooldown();
+    specialize(instr, specialized_op);
 }
 
 
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index 5793e5c649d6b3..bc7ccfe33e777d 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -483,7 +483,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]:
             ): (trace_too_long, attempts),
             Doc(
                 "Trace too short",
-                "A potential trace is abandoced because it it too short.",
+                "A potential trace is abandoned because it it too short.",
             ): (trace_too_short, attempts),
             Doc(
                 "Inner loop found", "A trace is truncated because it has an inner loop"
@@ -1118,6 +1118,8 @@ def calc_gc_stats(stats: Stats) -> Rows:
                 Count(gen["collections"]),
                 Count(gen["objects collected"]),
                 Count(gen["object visits"]),
+                Count(gen["objects reachable from roots"]),
+                Count(gen["objects not reachable from roots"]),
             )
             for (i, gen) in enumerate(gc_stats)
         ]
@@ -1127,7 +1129,8 @@ def calc_gc_stats(stats: Stats) -> Rows:
         "GC collections and effectiveness",
         [
             Table(
-                ("Generation:", "Collections:", "Objects collected:", "Object visits:"),
+                ("Generation:", "Collections:", "Objects collected:", "Object visits:",
+                 "Reachable from roots:", "Not reachable from roots:"),
                 calc_gc_stats,
             )
         ],
diff --git a/configure b/configure
index 84b74ac3584bcd..4e4043260ed2df 100755
--- a/configure
+++ b/configure
@@ -944,8 +944,8 @@ AR
 LINK_PYTHON_OBJS
 LINK_PYTHON_DEPS
 LIBRARY_DEPS
-NODE
 HOSTRUNNER
+NODE
 STATIC_LIBPYTHON
 GNULD
 EXPORTSFROM
@@ -1147,7 +1147,6 @@ LDFLAGS
 LIBS
 CPPFLAGS
 CPP
-HOSTRUNNER
 PROFILE_TASK
 BOLT_INSTRUMENT_FLAGS
 BOLT_APPLY_FLAGS
@@ -1968,7 +1967,6 @@ Some influential environment variables:
   CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
               you have headers in a nonstandard directory <include dir>
   CPP         C preprocessor
-  HOSTRUNNER  Program to run CPython for the host platform
   PROFILE_TASK
               Python args for PGO generation task
   BOLT_INSTRUMENT_FLAGS
@@ -7622,9 +7620,9 @@ if test "$cross_compiling" = yes; then
   RUNSHARED=
 fi
 
+# HOSTRUNNER - Program to run CPython for the host platform
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking HOSTRUNNER" >&5
 printf %s "checking HOSTRUNNER... " >&6; }
-
 if test -z "$HOSTRUNNER"
 then
   case $ac_sys_system in #(
diff --git a/configure.ac b/configure.ac
index 8fa6cb60900ad1..4cfced10432491 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1609,8 +1609,8 @@ if test "$cross_compiling" = yes; then
   RUNSHARED=
 fi
 
+# HOSTRUNNER - Program to run CPython for the host platform
 AC_MSG_CHECKING([HOSTRUNNER])
-AC_ARG_VAR([HOSTRUNNER], [Program to run CPython for the host platform])
 if test -z "$HOSTRUNNER"
 then
   AS_CASE([$ac_sys_system],