From 8b7c194c7bf7e547e4f6317528f0dcb9344c18c7 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 6 Dec 2024 13:28:32 +0300 Subject: [PATCH 01/55] gh-120010: Fix invalid (nan+nanj) results in _Py_c_prod() (GH-120287) In some cases, previously computed as (nan+nanj), we could recover meaningful component values in the result, see e.g. the C11, Annex G.5.1, routine _Cmultd(): >>> z = 1e300+1j >>> z*(nan+infj) # was (nan+nanj) (-inf+infj) That also fix some complex powers for small integer exponents, computed with optimized algorithm (by squaring): >>> z**5 # was (nan+nanj) Traceback (most recent call last): File "", line 1, in z**5 ~^^~ OverflowError: complex exponentiation --- Lib/test/test_complex.py | 17 ++++++ ...-06-04-08-26-25.gh-issue-120010._z-AWz.rst | 2 + Objects/complexobject.c | 60 +++++++++++++++++-- 3 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst diff --git a/Lib/test/test_complex.py b/Lib/test/test_complex.py index 179556f57e884f..fd002fb00ac338 100644 --- a/Lib/test/test_complex.py +++ b/Lib/test/test_complex.py @@ -299,6 +299,22 @@ def test_mul(self): self.assertRaises(TypeError, operator.mul, 1j, None) self.assertRaises(TypeError, operator.mul, None, 1j) + for z, w, r in [(1e300+1j, complex(INF, INF), complex(NAN, INF)), + (1e300+1j, complex(NAN, INF), complex(-INF, INF)), + (1e300+1j, complex(INF, NAN), complex(INF, INF)), + (complex(INF, 1), complex(NAN, INF), complex(NAN, INF)), + (complex(INF, 1), complex(INF, NAN), complex(INF, NAN)), + (complex(NAN, 1), complex(1, INF), complex(-INF, NAN)), + (complex(1, NAN), complex(1, INF), complex(NAN, INF)), + (complex(1e200, NAN), complex(1e200, NAN), complex(INF, NAN)), + (complex(1e200, NAN), complex(NAN, 1e200), complex(NAN, INF)), + (complex(NAN, 1e200), complex(1e200, NAN), complex(NAN, INF)), + (complex(NAN, 1e200), complex(NAN, 1e200), complex(-INF, NAN)), + (complex(NAN, NAN), complex(NAN, NAN), complex(NAN, NAN))]: + with self.subTest(z=z, w=w, r=r): + self.assertComplexesAreIdentical(z * w, r) + self.assertComplexesAreIdentical(w * z, r) + def test_mod(self): # % is no longer supported on complex numbers with self.assertRaises(TypeError): @@ -340,6 +356,7 @@ def test_pow(self): self.assertAlmostEqual(pow(1j, 200), 1) self.assertRaises(ValueError, pow, 1+1j, 1+1j, 1+1j) self.assertRaises(OverflowError, pow, 1e200+1j, 1e200+1j) + self.assertRaises(OverflowError, pow, 1e200+1j, 5) self.assertRaises(TypeError, pow, 1j, None) self.assertRaises(TypeError, pow, None, 1j) self.assertAlmostEqual(pow(1j, 0.5), 0.7071067811865476+0.7071067811865475j) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst new file mode 100644 index 00000000000000..7954c7f5927397 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst @@ -0,0 +1,2 @@ +Correct invalid corner cases which resulted in ``(nan+nanj)`` output in complex +multiplication, e.g., ``(1e300+1j)*(nan+infj)``. Patch by Sergey B Kirpichev. diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 8fbca3cb02d80a..bf6187efac941f 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -85,11 +85,63 @@ _Py_c_neg(Py_complex a) } Py_complex -_Py_c_prod(Py_complex a, Py_complex b) +_Py_c_prod(Py_complex z, Py_complex w) { - Py_complex r; - r.real = a.real*b.real - a.imag*b.imag; - r.imag = a.real*b.imag + a.imag*b.real; + double a = z.real, b = z.imag, c = w.real, d = w.imag; + double ac = a*c, bd = b*d, ad = a*d, bc = b*c; + Py_complex r = {ac - bd, ad + bc}; + + /* Recover infinities that computed as nan+nanj. See e.g. the C11, + Annex G.5.1, routine _Cmultd(). */ + if (isnan(r.real) && isnan(r.imag)) { + int recalc = 0; + + if (isinf(a) || isinf(b)) { /* z is infinite */ + /* "Box" the infinity and change nans in the other factor to 0 */ + a = copysign(isinf(a) ? 1.0 : 0.0, a); + b = copysign(isinf(b) ? 1.0 : 0.0, b); + if (isnan(c)) { + c = copysign(0.0, c); + } + if (isnan(d)) { + d = copysign(0.0, d); + } + recalc = 1; + } + if (isinf(c) || isinf(d)) { /* w is infinite */ + /* "Box" the infinity and change nans in the other factor to 0 */ + c = copysign(isinf(c) ? 1.0 : 0.0, c); + d = copysign(isinf(d) ? 1.0 : 0.0, d); + if (isnan(a)) { + a = copysign(0.0, a); + } + if (isnan(b)) { + b = copysign(0.0, b); + } + recalc = 1; + } + if (!recalc && (isinf(ac) || isinf(bd) || isinf(ad) || isinf(bc))) { + /* Recover infinities from overflow by changing nans to 0 */ + if (isnan(a)) { + a = copysign(0.0, a); + } + if (isnan(b)) { + b = copysign(0.0, b); + } + if (isnan(c)) { + c = copysign(0.0, c); + } + if (isnan(d)) { + d = copysign(0.0, d); + } + recalc = 1; + } + if (recalc) { + r.real = Py_INFINITY*(a*c - b*d); + r.imag = Py_INFINITY*(a*d + b*c); + } + } + return r; } From 023b7d2141467017abc27de864f3f44677768cb3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 10:46:59 +0000 Subject: [PATCH 02/55] GH-126491: Lower heap size limit with faster marking (GH-127519) * Faster marking of reachable objects * Changes calculation of work to do and work done. * Merges transitive closure calculations --- InternalDocs/garbage_collector.md | 50 ++++- Lib/test/test_gc.py | 14 +- Objects/dictobject.c | 4 +- Objects/genobject.c | 69 +------ Objects/typeobject.c | 13 ++ Python/gc.c | 301 ++++++++++++++---------------- 6 files changed, 208 insertions(+), 243 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 08db080a200ea4..4761f78f3593e3 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -515,6 +515,44 @@ increment. All objects directly referred to from those stack frames are added to the working set. Then the above algorithm is repeated, starting from step 2. +Determining how much work to do +------------------------------- + +We need to do a certain amount of work to enusre that garbage is collected, +but doing too much work slows down execution. + +To work out how much work we need to do, consider a heap with `L` live objects +and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects +at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and +we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. +For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which +`G1` garbage objects are created. + +The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, +assuming that the number of live objects remains roughly constant. +If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` +For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio. + +In other words, to keep the garbage fraction to 1/3 or less we need to visit +4 times as many objects as are newly created. + +We can do better than this though. Not all new objects will be garbage. +Consider the heap at the end of the scavenge with `L1` live objects and `G1` +garbage. Also, note that `T == M + I` where `M` is the number of objects marked +as reachable and `I` is the number of objects visited in increments. +Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. + +If we choose the amount of work done such that `2*M + I == 6N` then we can do +less work in most cases, but are still guaranteed to keep up. +Since `I ≳ G0 + G1` (not strictly true, but close enough) +`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up. + +The reason that this improves performance is that `M` is usually much larger +than `I`. If `M == 10I`, then `T ≅ 3N`. + +Finally, instead of using a fixed multiple of 8, we gradually increase it as the +heap grows. This avoids wasting work for small heaps and during startup. + Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b5140057a69d36..baf8e95dffdfce 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1161,27 +1161,19 @@ def make_ll(depth): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - for i in range(20_000): + iterations = max(20_000, initial_heap_size) + for i in range(iterations): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 1c9f86438dadc3..de518b8dc5024b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7064,9 +7064,7 @@ int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyTypeObject *tp = Py_TYPE(obj); - if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { - return 0; - } + assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); if (values->valid) { diff --git a/Objects/genobject.c b/Objects/genobject.c index e87f199c2504ba..33679afecb420f 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -882,25 +882,7 @@ PyTypeObject PyGen_Type = { gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static PyObject * @@ -1242,24 +1224,7 @@ PyTypeObject PyCoro_Type = { coro_methods, /* tp_methods */ coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static void @@ -1464,7 +1429,6 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) - static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1673,24 +1637,7 @@ PyTypeObject PyAsyncGen_Type = { async_gen_methods, /* tp_methods */ async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; @@ -1935,16 +1882,6 @@ PyTypeObject _PyAsyncGenASend_Type = { PyObject_SelfIter, /* tp_iter */ async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ .tp_finalize = async_gen_asend_finalize, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2068d6aa9be52b..cc95b9857e3f2d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2355,6 +2355,16 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg) return 0; } + +static int +plain_object_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyTypeObject *type = Py_TYPE(self); + assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); + Py_VISIT(type); + return PyObject_VisitManagedDict(self, visit, arg); +} + static void clear_slots(PyTypeObject *type, PyObject *self) { @@ -4147,6 +4157,9 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; + if (type->tp_basicsize == sizeof(PyObject)) { + type->tp_traverse = plain_object_traverse; + } } type->tp_basicsize = slotoffset; diff --git a/Python/gc.c b/Python/gc.c index 5b9588c8741b97..fd29a48518e71b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1277,18 +1277,13 @@ gc_list_set_space(PyGC_Head *list, int space) * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is + * new objects (N) to the heap, and incremental collectors + * scan I objects from the old space. + * I > N must be true. We also want I > N * K to be where + * K > 2. Higher values of K mean that the old space is * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) */ - -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ -#define SCAN_RATE_DIVISOR 10 +#define SCAN_RATE_DIVISOR 5 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1330,69 +1325,76 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -#ifndef NDEBUG -static inline int -IS_IN_VISITED(PyGC_Head *gc, int visited_space) +typedef struct work_stack { + PyGC_Head *top; + int visited_space; +} WorkStack; + +/* Remove gc from the list it is currently in and push it to the stack */ +static inline void +push_to_stack(PyGC_Head *gc, WorkStack *stack) { - assert(visited_space == 0 || other_space(visited_space) == 0); - return gc_old_space(gc) == visited_space; + PyGC_Head *prev = GC_PREV(gc); + PyGC_Head *next = GC_NEXT(gc); + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + _PyGCHead_SET_PREV(gc, stack->top); + stack->top = gc; } -#endif -struct container_and_flag { - PyGC_Head *container; - int visited_space; - intptr_t size; -}; +static inline PyGC_Head * +pop_from_stack(WorkStack *stack) +{ + PyGC_Head *gc = stack->top; + stack->top = _PyGCHead_PREV(gc); + return gc; +} -/* A traversal callback for adding to container) */ -static int -visit_add_to_container(PyObject *op, void *arg) +/* append list `from` to `stack`; `from` becomes an empty list */ +static void +move_list_to_stack(PyGC_Head *from, WorkStack *stack) { - OBJECT_STAT_INC(object_visits); - struct container_and_flag *cf = (struct container_and_flag *)arg; - int visited = cf->visited_space; - assert(visited == get_gc_state()->visited_space); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + if (!gc_list_is_empty(from)) { + PyGC_Head *from_head = GC_NEXT(from); + PyGC_Head *from_tail = GC_PREV(from); + _PyGCHead_SET_PREV(from_head, stack->top); + stack->top = from_tail; + gc_list_init(from); + } +} + +static inline void +move_to_stack(PyObject *op, WorkStack *stack, int visited_space) +{ + assert(op != NULL); + if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited) { + gc_old_space(gc) != visited_space) { + assert(!_Py_IsImmortal(op)); gc_flip_old_space(gc); - gc_list_move(gc, cf->container); - cf->size++; + push_to_stack(gc, stack); } } - return 0; } -static intptr_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) -{ - struct container_and_flag arg = { - .container = container, - .visited_space = gcstate->visited_space, - .size = 0 - }; - assert(GC_NEXT(gc) == container); - while (gc != container) { - /* Survivors will be moved to visited space, so they should - * have been marked as visited */ - assert(IS_IN_VISITED(gc, gcstate->visited_space)); - PyObject *op = FROM_GC(gc); - assert(_PyObject_GC_IS_TRACKED(op)); - if (_Py_IsImmortal(op)) { - PyGC_Head *next = GC_NEXT(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; - } - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - gc = GC_NEXT(gc); - } - return arg.size; +static void +move_unvisited(PyObject *op, WorkStack *stack, int visited_space) +{ + move_to_stack(op, stack, visited_space); +} + +#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V)) + +/* A traversal callback for adding to container */ +static int +visit_add_to_container(PyObject *op, void *arg) +{ + OBJECT_STAT_INC(object_visits); + WorkStack *stack = (WorkStack *)arg; + assert(stack->visited_space == get_gc_state()->visited_space); + move_to_stack(op, stack, stack->visited_space); + return 0; } /* Do bookkeeping for a completed GC cycle */ @@ -1420,54 +1422,62 @@ completed_scavenge(GCState *gcstate) gc_list_set_space(&gcstate->old[not_visited].head, not_visited); } assert(gc_list_is_empty(&gcstate->old[visited].head)); - gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } -static intptr_t -move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) -{ - if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, reachable); - return 1; +static void +frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) +{ + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + if (frame->f_locals != NULL) { + move_unvisited(frame->f_locals, stack, visited_space); + } + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + move_unvisited(func, stack, visited_space); + while (sp > locals) { + sp--; + _PyStackRef ref = *sp; + if (!PyStackRef_IsNull(ref)) { + PyObject *op = PyStackRef_AsPyObjectBorrow(ref); + if (!_Py_IsImmortal(op)) { + move_unvisited(op, stack, visited_space); + } } } - return 0; } -static intptr_t -mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +static Py_ssize_t +move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty - struct container_and_flag arg = { - .container = reachable, - .visited_space = visited_space, - .size = 0 - }; - while (!gc_list_is_empty(reachable)) { - PyGC_Head *gc = _PyGCHead_NEXT(reachable); + Py_ssize_t objects_marked = 0; + while (stack->top != NULL) { + PyGC_Head *gc = pop_from_stack(stack); assert(gc_old_space(gc) == visited_space); - gc_list_move(gc, visited); + gc_list_append(gc, visited); + objects_marked++; PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); + assert(PyObject_IS_GC(op)); + assert(_PyObject_GC_IS_TRACKED(op)); + if (_Py_IsImmortal(op)) { + _PyObject_GC_UNTRACK(op); + } + else { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, visit_add_to_container, stack); + } } gc_list_validate_space(visited, visited_space); - return arg.size; + return objects_marked; } static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1480,27 +1490,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - _PyStackRef *locals = frame->localsplus; - _PyStackRef *sp = frame->stackpointer; - objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - objects_marked += move_to_reachable(func, &reachable, visited_space); - while (sp > locals) { - sp--; - if (PyStackRef_IsNull(*sp)) { - continue; - } - PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - objects_marked++; - gc_list_move(gc, &reachable); - } - } - } + frame_move_unvisited(frame, &stack, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1513,31 +1503,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; - objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); - objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); - objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; + MOVE_UNVISITED(interp->sysdict, &stack, visited_space); + MOVE_UNVISITED(interp->builtins, &stack, visited_space); + MOVE_UNVISITED(interp->dict, &stack, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } @@ -1549,39 +1539,35 @@ mark_at_start(PyThreadState *tstate) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); - gcstate->work_to_do -= objects_marked; gcstate->phase = GC_PHASE_COLLECT; validate_spaces(gcstate); return objects_marked; } + +/* See InternalDocs/garbage_collector.md for more details. */ +#define MAX_HEAP_PORTION_MULTIPLIER 5 +#define MARKING_PROGRESS_MULTIPLIER 2 + static intptr_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on three things. + /* The amount of work we want to do depends on two things. * 1. The number of new objects created - * 2. The growth in heap size since the last collection - * 3. The heap size (up to the number of new objects, to avoid quadratic effects) - * - * For a steady state heap, the amount of work to do is three times the number - * of new objects added to the heap. This ensures that we stay ahead in the - * worst case of all new objects being garbage. - * - * This could be improved by tracking survival rates, but it is still a - * large improvement on the non-marking approach. + * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*3/2; - intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_fraction > max_heap_fraction) { - heap_fraction = max_heap_fraction; + intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; + intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_portion > max_heap_portion) { + heap_portion = max_heap_portion; } gcstate->young.count = 0; - return new_objects + heap_fraction; + return new_objects + heap_portion; } static void @@ -1594,36 +1580,37 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; validate_spaces(gcstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - PyGC_Head increment; - gc_list_init(&increment); - int scale_factor = gcstate->old[0].threshold; - if (scale_factor < 2) { - scale_factor = 2; - } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - gc_list_merge(&gcstate->young.head, &increment); + PyGC_Head increment; + gc_list_init(&increment); + WorkStack working; + working.top = 0; + working.visited_space = gcstate->visited_space; + move_list_to_stack(&gcstate->young.head, &working); + Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = gc_list_size(&increment); + assert(working.top == NULL); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &increment); - increment_size++; - assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); - increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); + push_to_stack(gc, &working); + assert(!_Py_IsImmortal(FROM_GC(gc))); + increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + assert(working.top == NULL); } + assert(increment_size == gc_list_size(&increment)); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); @@ -1632,7 +1619,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); @@ -1668,6 +1654,7 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_scavenge(gcstate); + gcstate->work_to_do = -gcstate->young.threshold; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From 77a61c0465c27c1c4ba7cddf4638d9ed75259671 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Fri, 6 Dec 2024 23:09:20 +0900 Subject: [PATCH 03/55] gh-101100: amend references starting with `!~` in gh-127054 (#127684) --- Doc/tutorial/datastructures.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 263b0c2e2815a1..cbe780e075baf5 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -142,8 +142,8 @@ Using Lists as Stacks The list methods make it very easy to use a list as a stack, where the last element added is the first element retrieved ("last-in, first-out"). To add an -item to the top of the stack, use :meth:`!~list.append`. To retrieve an item from the -top of the stack, use :meth:`!~list.pop` without an explicit index. For example:: +item to the top of the stack, use :meth:`!append`. To retrieve an item from the +top of the stack, use :meth:`!pop` without an explicit index. For example:: >>> stack = [3, 4, 5] >>> stack.append(6) @@ -340,7 +340,7 @@ The :keyword:`!del` statement ============================= There is a way to remove an item from a list given its index instead of its -value: the :keyword:`del` statement. This differs from the :meth:`!~list.pop` method +value: the :keyword:`del` statement. This differs from the :meth:`!pop` method which returns a value. The :keyword:`!del` statement can also be used to remove slices from a list or clear the entire list (which we did earlier by assignment of an empty list to the slice). For example:: @@ -500,8 +500,8 @@ any immutable type; strings and numbers can always be keys. Tuples can be used as keys if they contain only strings, numbers, or tuples; if a tuple contains any mutable object either directly or indirectly, it cannot be used as a key. You can't use lists as keys, since lists can be modified in place using index -assignments, slice assignments, or methods like :meth:`!~list.append` and -:meth:`!~list.extend`. +assignments, slice assignments, or methods like :meth:`!append` and +:meth:`!extend`. It is best to think of a dictionary as a set of *key: value* pairs, with the requirement that the keys are unique (within one dictionary). A pair of From 36c6178d372b075e9c74b786cfb5e47702976b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:31:30 +0100 Subject: [PATCH 04/55] gh-126024: fix UBSan failure in `unicodeobject.c:find_first_nonascii` (GH-127566) --- Objects/unicodeobject.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 463da06445984b..33c4747bbef488 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5083,12 +5083,9 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); #if PY_LITTLE_ENDIAN && HAVE_CTZ if (p < p2) { -#if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) - // x86 and amd64 are little endian and can load unaligned memory. - size_t u = *(const size_t*)p & ASCII_CHAR_MASK; -#else - size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK; -#endif + size_t u; + memcpy(&u, p, sizeof(size_t)); + u &= ASCII_CHAR_MASK; if (u) { return (ctz(u) - 7) / 8; } From a353455fca1b8f468ff3ffbb4b5e316510b4fd43 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 6 Dec 2024 15:48:24 +0000 Subject: [PATCH 05/55] gh-125610: Fix `STORE_ATTR_INSTANCE_VALUE` specialization check (GH-125612) The `STORE_ATTR_INSTANCE_VALUE` opcode doesn't support objects with non-NULL managed dictionaries, so don't specialize to that op in that case. --- Python/specialize.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index ec2cd7025e5054..d3fea717243847 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -947,7 +947,10 @@ specialize_dict_access( return 0; } _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); - if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES && _PyObject_InlineValues(owner)->valid) { + if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES && + _PyObject_InlineValues(owner)->valid && + !(base_op == STORE_ATTR && _PyObject_GetManagedDict(owner) != NULL)) + { PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; assert(PyUnicode_CheckExact(name)); Py_ssize_t index = _PyDictKeys_StringLookup(keys, name); From 12680ec5bd45c85b6daebe0739d30ef45f089efa Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Fri, 6 Dec 2024 10:58:19 -0500 Subject: [PATCH 06/55] gh-127314: Don't mention the GIL when calling without a thread state on the free-threaded build (#127315) Co-authored-by: Victor Stinner --- Include/internal/pycore_pystate.h | 8 ++++++++ Lib/test/test_capi/test_mem.py | 9 +++++++-- Lib/test/test_capi/test_misc.py | 17 ++++++++++++----- ...24-11-26-22-06-10.gh-issue-127314.SsRrIu.rst | 2 ++ Objects/obmalloc.c | 7 +++++++ 5 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 54d8803bc0bdb6..1e73e541ef8de0 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -190,10 +190,18 @@ static inline void _Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate) { if (tstate == NULL) { +#ifndef Py_GIL_DISABLED _Py_FatalErrorFunc(func, "the function must be called with the GIL held, " "after Python initialization and before Python finalization, " "but the GIL is released (the current Python thread state is NULL)"); +#else + _Py_FatalErrorFunc(func, + "the function must be called with an active thread state, " + "after Python initialization and before Python finalization, " + "but it was called without an active thread state. " + "Are you trying to call the C API inside of a Py_BEGIN_ALLOW_THREADS block?"); +#endif } } diff --git a/Lib/test/test_capi/test_mem.py b/Lib/test/test_capi/test_mem.py index 6ab7b685c2e18b..5035b2b4829bf6 100644 --- a/Lib/test/test_capi/test_mem.py +++ b/Lib/test/test_capi/test_mem.py @@ -68,8 +68,13 @@ def test_api_misuse(self): def check_malloc_without_gil(self, code): out = self.check(code) - expected = ('Fatal Python error: _PyMem_DebugMalloc: ' - 'Python memory allocator called without holding the GIL') + if not support.Py_GIL_DISABLED: + expected = ('Fatal Python error: _PyMem_DebugMalloc: ' + 'Python memory allocator called without holding the GIL') + else: + expected = ('Fatal Python error: _PyMem_DebugMalloc: ' + 'Python memory allocator called without an active thread state. ' + 'Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?') self.assertIn(expected, out) def test_pymem_malloc_without_gil(self): diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 8e0271919cc8a5..61512e610f46f2 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -100,11 +100,18 @@ def test_no_FatalError_infinite_loop(self): _rc, out, err = run_result self.assertEqual(out, b'') # This used to cause an infinite loop. - msg = ("Fatal Python error: PyThreadState_Get: " - "the function must be called with the GIL held, " - "after Python initialization and before Python finalization, " - "but the GIL is released " - "(the current Python thread state is NULL)").encode() + if not support.Py_GIL_DISABLED: + msg = ("Fatal Python error: PyThreadState_Get: " + "the function must be called with the GIL held, " + "after Python initialization and before Python finalization, " + "but the GIL is released " + "(the current Python thread state is NULL)").encode() + else: + msg = ("Fatal Python error: PyThreadState_Get: " + "the function must be called with an active thread state, " + "after Python initialization and before Python finalization, " + "but it was called without an active thread state. " + "Are you trying to call the C API inside of a Py_BEGIN_ALLOW_THREADS block?").encode() self.assertTrue(err.rstrip().startswith(msg), err) diff --git a/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst b/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst new file mode 100644 index 00000000000000..8ea3c4ee2a2c53 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst @@ -0,0 +1,2 @@ +Improve error message when calling the C API without an active thread state +on the :term:`free-threaded ` build. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 2cc0377f68f990..b103deb01ca712 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2910,9 +2910,16 @@ static inline void _PyMem_DebugCheckGIL(const char *func) { if (!PyGILState_Check()) { +#ifndef Py_GIL_DISABLED _Py_FatalErrorFunc(func, "Python memory allocator called " "without holding the GIL"); +#else + _Py_FatalErrorFunc(func, + "Python memory allocator called " + "without an active thread state. " + "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?"); +#endif } } From 67b18a18b66b89e253f38895057ef9f6bae92e7b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 6 Dec 2024 17:27:12 +0100 Subject: [PATCH 07/55] gh-59705: Add _thread.set_name() function (#127338) On Linux, threading.Thread now sets the thread name to the operating system. * configure now checks if pthread_getname_np() and pthread_setname_np() functions are available. * Add PYTHREAD_NAME_MAXLEN macro. * Add _thread._NAME_MAXLEN constant for test_threading. Co-authored-by: Serhiy Storchaka --- Lib/test/test_threading.py | 60 ++++++++++ Lib/threading.py | 9 ++ ...4-11-27-17-04-38.gh-issue-59705.sAGyvs.rst | 2 + Modules/_threadmodule.c | 108 ++++++++++++++++++ Modules/clinic/_threadmodule.c.h | 104 +++++++++++++++++ configure | 30 +++++ configure.ac | 22 +++- pyconfig.h.in | 9 ++ 8 files changed, 342 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst create mode 100644 Modules/clinic/_threadmodule.c.h diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index fe225558fc4f0b..d05161f46f1034 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2104,6 +2104,66 @@ def test__all__(self): support.check__all__(self, threading, ('threading', '_thread'), extra=extra, not_exported=not_exported) + @unittest.skipUnless(hasattr(_thread, 'set_name'), "missing _thread.set_name") + @unittest.skipUnless(hasattr(_thread, '_get_name'), "missing _thread._get_name") + def test_set_name(self): + # set_name() limit in bytes + truncate = getattr(_thread, "_NAME_MAXLEN", None) + limit = truncate or 100 + + tests = [ + # test short ASCII name + "CustomName", + + # test short non-ASCII name + "namé€", + + # embedded null character: name is truncated + # at the first null character + "embed\0null", + + # Test long ASCII names (not truncated) + "x" * limit, + + # Test long ASCII names (truncated) + "x" * (limit + 10), + + # Test long non-ASCII name (truncated) + "x" * (limit - 1) + "é€", + ] + if os_helper.FS_NONASCII: + tests.append(f"nonascii:{os_helper.FS_NONASCII}") + if os_helper.TESTFN_UNENCODABLE: + tests.append(os_helper.TESTFN_UNENCODABLE) + + if sys.platform.startswith("solaris"): + encoding = "utf-8" + else: + encoding = sys.getfilesystemencoding() + + def work(): + nonlocal work_name + work_name = _thread._get_name() + + for name in tests: + encoded = name.encode(encoding, "replace") + if b'\0' in encoded: + encoded = encoded.split(b'\0', 1)[0] + if truncate is not None: + encoded = encoded[:truncate] + if sys.platform.startswith("solaris"): + expected = encoded.decode("utf-8", "surrogateescape") + else: + expected = os.fsdecode(encoded) + + with self.subTest(name=name, expected=expected): + work_name = None + thread = threading.Thread(target=work, name=name) + thread.start() + thread.join() + self.assertEqual(work_name, expected, + f"{len(work_name)=} and {len(expected)=}") + class InterruptMainTests(unittest.TestCase): def check_interrupt_main_with_signal_handler(self, signum): diff --git a/Lib/threading.py b/Lib/threading.py index 94ea2f08178369..3abd22a2aa1b72 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -48,6 +48,10 @@ __all__.append('get_native_id') except AttributeError: _HAVE_THREAD_NATIVE_ID = False +try: + _set_name = _thread.set_name +except AttributeError: + _set_name = None ThreadError = _thread.error try: _CRLock = _thread.RLock @@ -1027,6 +1031,11 @@ def _bootstrap_inner(self): self._set_ident() if _HAVE_THREAD_NATIVE_ID: self._set_native_id() + if _set_name is not None and self._name: + try: + _set_name(self._name) + except OSError: + pass self._started.set() with _active_limbo_lock: _active[self._ident] = self diff --git a/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst b/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst new file mode 100644 index 00000000000000..a8c7b3d00755e6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst @@ -0,0 +1,2 @@ +On Linux, :class:`threading.Thread` now sets the thread name to the +operating system. Patch by Victor Stinner. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 4a45445e2f62db..35c032fbeaa94f 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -17,6 +17,8 @@ # include // SIGINT #endif +#include "clinic/_threadmodule.c.h" + // ThreadError is just an alias to PyExc_RuntimeError #define ThreadError PyExc_RuntimeError @@ -44,6 +46,13 @@ get_thread_state(PyObject *module) return (thread_module_state *)state; } + +/*[clinic input] +module _thread +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=be8dbe5cc4b16df7]*/ + + // _ThreadHandle type // Handles state transitions according to the following diagram: @@ -2354,6 +2363,96 @@ PyDoc_STRVAR(thread__get_main_thread_ident_doc, Internal only. Return a non-zero integer that uniquely identifies the main thread\n\ of the main interpreter."); + +#ifdef HAVE_PTHREAD_GETNAME_NP +/*[clinic input] +_thread._get_name + +Get the name of the current thread. +[clinic start generated code]*/ + +static PyObject * +_thread__get_name_impl(PyObject *module) +/*[clinic end generated code: output=20026e7ee3da3dd7 input=35cec676833d04c8]*/ +{ + // Linux and macOS are limited to respectively 16 and 64 bytes + char name[100]; + pthread_t thread = pthread_self(); + int rc = pthread_getname_np(thread, name, Py_ARRAY_LENGTH(name)); + if (rc) { + errno = rc; + return PyErr_SetFromErrno(PyExc_OSError); + } + +#ifdef __sun + return PyUnicode_DecodeUTF8(name, strlen(name), "surrogateescape"); +#else + return PyUnicode_DecodeFSDefault(name); +#endif +} +#endif // HAVE_PTHREAD_GETNAME_NP + + +#ifdef HAVE_PTHREAD_SETNAME_NP +/*[clinic input] +_thread.set_name + + name as name_obj: unicode + +Set the name of the current thread. +[clinic start generated code]*/ + +static PyObject * +_thread_set_name_impl(PyObject *module, PyObject *name_obj) +/*[clinic end generated code: output=402b0c68e0c0daed input=7e7acd98261be82f]*/ +{ +#ifdef __sun + // Solaris always uses UTF-8 + const char *encoding = "utf-8"; +#else + // Encode the thread name to the filesystem encoding using the "replace" + // error handler + PyInterpreterState *interp = _PyInterpreterState_GET(); + const char *encoding = interp->unicode.fs_codec.encoding; +#endif + PyObject *name_encoded; + name_encoded = PyUnicode_AsEncodedString(name_obj, encoding, "replace"); + if (name_encoded == NULL) { + return NULL; + } + +#ifdef PYTHREAD_NAME_MAXLEN + // Truncate to PYTHREAD_NAME_MAXLEN bytes + the NUL byte if needed + size_t len = PyBytes_GET_SIZE(name_encoded); + if (len > PYTHREAD_NAME_MAXLEN) { + PyObject *truncated; + truncated = PyBytes_FromStringAndSize(PyBytes_AS_STRING(name_encoded), + PYTHREAD_NAME_MAXLEN); + if (truncated == NULL) { + Py_DECREF(name_encoded); + return NULL; + } + Py_SETREF(name_encoded, truncated); + } +#endif + + const char *name = PyBytes_AS_STRING(name_encoded); +#ifdef __APPLE__ + int rc = pthread_setname_np(name); +#else + pthread_t thread = pthread_self(); + int rc = pthread_setname_np(thread, name); +#endif + Py_DECREF(name_encoded); + if (rc) { + errno = rc; + return PyErr_SetFromErrno(PyExc_OSError); + } + Py_RETURN_NONE; +} +#endif // HAVE_PTHREAD_SETNAME_NP + + static PyMethodDef thread_methods[] = { {"start_new_thread", (PyCFunction)thread_PyThread_start_new_thread, METH_VARARGS, start_new_thread_doc}, @@ -2393,6 +2492,8 @@ static PyMethodDef thread_methods[] = { METH_O, thread__make_thread_handle_doc}, {"_get_main_thread_ident", thread__get_main_thread_ident, METH_NOARGS, thread__get_main_thread_ident_doc}, + _THREAD_SET_NAME_METHODDEF + _THREAD__GET_NAME_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2484,6 +2585,13 @@ thread_module_exec(PyObject *module) llist_init(&state->shutdown_handles); +#ifdef PYTHREAD_NAME_MAXLEN + if (PyModule_AddIntConstant(module, "_NAME_MAXLEN", + PYTHREAD_NAME_MAXLEN) < 0) { + return -1; + } +#endif + return 0; } diff --git a/Modules/clinic/_threadmodule.c.h b/Modules/clinic/_threadmodule.c.h new file mode 100644 index 00000000000000..8f0507d40285b3 --- /dev/null +++ b/Modules/clinic/_threadmodule.c.h @@ -0,0 +1,104 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +#if defined(HAVE_PTHREAD_GETNAME_NP) + +PyDoc_STRVAR(_thread__get_name__doc__, +"_get_name($module, /)\n" +"--\n" +"\n" +"Get the name of the current thread."); + +#define _THREAD__GET_NAME_METHODDEF \ + {"_get_name", (PyCFunction)_thread__get_name, METH_NOARGS, _thread__get_name__doc__}, + +static PyObject * +_thread__get_name_impl(PyObject *module); + +static PyObject * +_thread__get_name(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return _thread__get_name_impl(module); +} + +#endif /* defined(HAVE_PTHREAD_GETNAME_NP) */ + +#if defined(HAVE_PTHREAD_SETNAME_NP) + +PyDoc_STRVAR(_thread_set_name__doc__, +"set_name($module, /, name)\n" +"--\n" +"\n" +"Set the name of the current thread."); + +#define _THREAD_SET_NAME_METHODDEF \ + {"set_name", _PyCFunction_CAST(_thread_set_name), METH_FASTCALL|METH_KEYWORDS, _thread_set_name__doc__}, + +static PyObject * +_thread_set_name_impl(PyObject *module, PyObject *name_obj); + +static PyObject * +_thread_set_name(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(name), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"name", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "set_name", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *name_obj; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("set_name", "argument 'name'", "str", args[0]); + goto exit; + } + name_obj = args[0]; + return_value = _thread_set_name_impl(module, name_obj); + +exit: + return return_value; +} + +#endif /* defined(HAVE_PTHREAD_SETNAME_NP) */ + +#ifndef _THREAD__GET_NAME_METHODDEF + #define _THREAD__GET_NAME_METHODDEF +#endif /* !defined(_THREAD__GET_NAME_METHODDEF) */ + +#ifndef _THREAD_SET_NAME_METHODDEF + #define _THREAD_SET_NAME_METHODDEF +#endif /* !defined(_THREAD_SET_NAME_METHODDEF) */ +/*[clinic end generated code: output=b5cb85aaccc45bf6 input=a9049054013a1b77]*/ diff --git a/configure b/configure index 5e9bcb602d884e..bcbab8dfcff190 100755 --- a/configure +++ b/configure @@ -821,6 +821,7 @@ MODULE_TIME_TRUE MODULE__IO_FALSE MODULE__IO_TRUE MODULE_BUILDTYPE +PYTHREAD_NAME_MAXLEN TEST_MODULES OPENSSL_LDFLAGS OPENSSL_LIBS @@ -18841,6 +18842,18 @@ if test "x$ac_cv_func_pthread_kill" = xyes then : printf "%s\n" "#define HAVE_PTHREAD_KILL 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "pthread_getname_np" "ac_cv_func_pthread_getname_np" +if test "x$ac_cv_func_pthread_getname_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_GETNAME_NP 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "pthread_setname_np" "ac_cv_func_pthread_setname_np" +if test "x$ac_cv_func_pthread_setname_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_SETNAME_NP 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "ptsname" "ac_cv_func_ptsname" if test "x$ac_cv_func_ptsname" = xyes @@ -29081,6 +29094,23 @@ fi CPPFLAGS=$save_CPPFLAGS +# gh-59705: Maximum length in bytes of a thread name +case "$ac_sys_system" in + Linux*) PYTHREAD_NAME_MAXLEN=15;; # Linux and Android + SunOS*) PYTHREAD_NAME_MAXLEN=31;; + Darwin) PYTHREAD_NAME_MAXLEN=63;; + iOS) PYTHREAD_NAME_MAXLEN=63;; + FreeBSD*) PYTHREAD_NAME_MAXLEN=98;; + *) PYTHREAD_NAME_MAXLEN=;; +esac +if test -n "$PYTHREAD_NAME_MAXLEN"; then + +printf "%s\n" "#define PYTHREAD_NAME_MAXLEN $PYTHREAD_NAME_MAXLEN" >>confdefs.h + +fi + + + # stdlib diff --git a/configure.ac b/configure.ac index bf3685e1b1b209..922a125ea9608e 100644 --- a/configure.ac +++ b/configure.ac @@ -5110,8 +5110,10 @@ AC_CHECK_FUNCS([ \ mknod mknodat mktime mmap mremap nice openat opendir pathconf pause pipe \ pipe2 plock poll posix_fadvise posix_fallocate posix_openpt posix_spawn posix_spawnp \ posix_spawn_file_actions_addclosefrom_np \ - pread preadv preadv2 process_vm_readv pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ - pthread_kill ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ + pread preadv preadv2 process_vm_readv \ + pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ + pthread_kill pthread_getname_np pthread_setname_np \ + ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ rtpSpawn sched_get_priority_max sched_rr_get_interval sched_setaffinity \ sched_setparam sched_setscheduler sem_clockwait sem_getvalue sem_open \ sem_timedwait sem_unlink sendfile setegid seteuid setgid sethostname \ @@ -7498,6 +7500,22 @@ AS_VAR_IF([ac_cv_libatomic_needed], [yes], _RESTORE_VAR([CPPFLAGS]) +# gh-59705: Maximum length in bytes of a thread name +case "$ac_sys_system" in + Linux*) PYTHREAD_NAME_MAXLEN=15;; # Linux and Android + SunOS*) PYTHREAD_NAME_MAXLEN=31;; + Darwin) PYTHREAD_NAME_MAXLEN=63;; + iOS) PYTHREAD_NAME_MAXLEN=63;; + FreeBSD*) PYTHREAD_NAME_MAXLEN=98;; + *) PYTHREAD_NAME_MAXLEN=;; +esac +if test -n "$PYTHREAD_NAME_MAXLEN"; then + AC_DEFINE_UNQUOTED([PYTHREAD_NAME_MAXLEN], [$PYTHREAD_NAME_MAXLEN], + [Maximum length in bytes of a thread name]) +fi +AC_SUBST([PYTHREAD_NAME_MAXLEN]) + + # stdlib AC_DEFUN([PY_STDLIB_MOD_SET_NA], [ m4_foreach([mod], [$@], [ diff --git a/pyconfig.h.in b/pyconfig.h.in index 6a1f1284650b9f..166c195a8c66fc 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -981,6 +981,9 @@ /* Define to 1 if you have the `pthread_getcpuclockid' function. */ #undef HAVE_PTHREAD_GETCPUCLOCKID +/* Define to 1 if you have the `pthread_getname_np' function. */ +#undef HAVE_PTHREAD_GETNAME_NP + /* Define to 1 if you have the header file. */ #undef HAVE_PTHREAD_H @@ -990,6 +993,9 @@ /* Define to 1 if you have the `pthread_kill' function. */ #undef HAVE_PTHREAD_KILL +/* Define to 1 if you have the `pthread_setname_np' function. */ +#undef HAVE_PTHREAD_SETNAME_NP + /* Define to 1 if you have the `pthread_sigmask' function. */ #undef HAVE_PTHREAD_SIGMASK @@ -1650,6 +1656,9 @@ /* Define as the preferred size in bits of long digits */ #undef PYLONG_BITS_IN_DIGIT +/* Maximum length in bytes of a thread name */ +#undef PYTHREAD_NAME_MAXLEN + /* enabled builtin hash modules */ #undef PY_BUILTIN_HASHLIB_HASHES From 89fa7ec74e531870a8f495d5e32ec0b00dbcd32b Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:36:06 +0000 Subject: [PATCH 08/55] gh-119786: Add jit.md. Move adaptive.md to a section of interpreter.md. (#127175) --- InternalDocs/README.md | 4 +- InternalDocs/adaptive.md | 146 ------------------------ InternalDocs/code_objects.md | 5 + InternalDocs/compiler.md | 10 -- InternalDocs/interpreter.md | 210 ++++++++++++++++++++++++++++++----- InternalDocs/jit.md | 134 ++++++++++++++++++++++ 6 files changed, 322 insertions(+), 187 deletions(-) delete mode 100644 InternalDocs/adaptive.md create mode 100644 InternalDocs/jit.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 8cdd06d189f362..794b4f3c6aad42 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -34,9 +34,9 @@ Runtime Objects Program Execution --- -- [The Interpreter](interpreter.md) +- [The Bytecode Interpreter](interpreter.md) -- [Adaptive Instruction Families](adaptive.md) +- [The JIT](jit.md) - [Garbage Collector Design](garbage_collector.md) diff --git a/InternalDocs/adaptive.md b/InternalDocs/adaptive.md deleted file mode 100644 index 7cfa8e52310460..00000000000000 --- a/InternalDocs/adaptive.md +++ /dev/null @@ -1,146 +0,0 @@ -# Adding or extending a family of adaptive instructions. - -## Families of instructions - -The core part of [PEP 659](https://peps.python.org/pep-0659/) -(specializing adaptive interpreter) is the families of -instructions that perform the adaptive specialization. - -A family of instructions has the following fundamental properties: - -* It corresponds to a single instruction in the code - generated by the bytecode compiler. -* It has a single adaptive instruction that records an execution count and, - at regular intervals, attempts to specialize itself. If not specializing, - it executes the base implementation. -* It has at least one specialized form of the instruction that is tailored - for a particular value or set of values at runtime. -* All members of the family must have the same number of inline cache entries, - to ensure correct execution. - Individual family members do not need to use all of the entries, - but must skip over any unused entries when executing. - -The current implementation also requires the following, -although these are not fundamental and may change: - -* All families use one or more inline cache entries, - the first entry is always the counter. -* All instruction names should start with the name of the adaptive - instruction. -* Specialized forms should have names describing their specialization. - -## Example family - -The `LOAD_GLOBAL` instruction (in [Python/bytecodes.c](../Python/bytecodes.c)) -already has an adaptive family that serves as a relatively simple example. - -The `LOAD_GLOBAL` instruction performs adaptive specialization, -calling `_Py_Specialize_LoadGlobal()` when the counter reaches zero. - -There are two specialized instructions in the family, `LOAD_GLOBAL_MODULE` -which is specialized for global variables in the module, and -`LOAD_GLOBAL_BUILTIN` which is specialized for builtin variables. - -## Performance analysis - -The benefit of a specialization can be assessed with the following formula: -`Tbase/Tadaptive`. - -Where `Tbase` is the mean time to execute the base instruction, -and `Tadaptive` is the mean time to execute the specialized and adaptive forms. - -`Tadaptive = (sum(Ti*Ni) + Tmiss*Nmiss)/(sum(Ni)+Nmiss)` - -`Ti` is the time to execute the `i`th instruction in the family and `Ni` is -the number of times that instruction is executed. -`Tmiss` is the time to process a miss, including de-optimzation -and the time to execute the base instruction. - -The ideal situation is where misses are rare and the specialized -forms are much faster than the base instruction. -`LOAD_GLOBAL` is near ideal, `Nmiss/sum(Ni) ≈ 0`. -In which case we have `Tadaptive ≈ sum(Ti*Ni)`. -Since we can expect the specialized forms `LOAD_GLOBAL_MODULE` and -`LOAD_GLOBAL_BUILTIN` to be much faster than the adaptive base instruction, -we would expect the specialization of `LOAD_GLOBAL` to be profitable. - -## Design considerations - -While `LOAD_GLOBAL` may be ideal, instructions like `LOAD_ATTR` and -`CALL_FUNCTION` are not. For maximum performance we want to keep `Ti` -low for all specialized instructions and `Nmiss` as low as possible. - -Keeping `Nmiss` low means that there should be specializations for almost -all values seen by the base instruction. Keeping `sum(Ti*Ni)` low means -keeping `Ti` low which means minimizing branches and dependent memory -accesses (pointer chasing). These two objectives may be in conflict, -requiring judgement and experimentation to design the family of instructions. - -The size of the inline cache should as small as possible, -without impairing performance, to reduce the number of -`EXTENDED_ARG` jumps, and to reduce pressure on the CPU's data cache. - -### Gathering data - -Before choosing how to specialize an instruction, it is important to gather -some data. What are the patterns of usage of the base instruction? -Data can best be gathered by instrumenting the interpreter. Since a -specialization function and adaptive instruction are going to be required, -instrumentation can most easily be added in the specialization function. - -### Choice of specializations - -The performance of the specializing adaptive interpreter relies on the -quality of specialization and keeping the overhead of specialization low. - -Specialized instructions must be fast. In order to be fast, -specialized instructions should be tailored for a particular -set of values that allows them to: - -1. Verify that incoming value is part of that set with low overhead. -2. Perform the operation quickly. - -This requires that the set of values is chosen such that membership can be -tested quickly and that membership is sufficient to allow the operation to -performed quickly. - -For example, `LOAD_GLOBAL_MODULE` is specialized for `globals()` -dictionaries that have a keys with the expected version. - -This can be tested quickly: - -* `globals->keys->dk_version == expected_version` - -and the operation can be performed quickly: - -* `value = entries[cache->index].me_value;`. - -Because it is impossible to measure the performance of an instruction without -also measuring unrelated factors, the assessment of the quality of a -specialization will require some judgement. - -As a general rule, specialized instructions should be much faster than the -base instruction. - -### Implementation of specialized instructions - -In general, specialized instructions should be implemented in two parts: - -1. A sequence of guards, each of the form - `DEOPT_IF(guard-condition-is-false, BASE_NAME)`. -2. The operation, which should ideally have no branches and - a minimum number of dependent memory accesses. - -In practice, the parts may overlap, as data required for guards -can be re-used in the operation. - -If there are branches in the operation, then consider further specialization -to eliminate the branches. - -### Maintaining stats - -Finally, take care that stats are gather correctly. -After the last `DEOPT_IF` has passed, a hit should be recorded with -`STAT_INC(BASE_INSTRUCTION, hit)`. -After an optimization has been deferred in the adaptive instruction, -that should be recorded with `STAT_INC(BASE_INSTRUCTION, deferred)`. diff --git a/InternalDocs/code_objects.md b/InternalDocs/code_objects.md index d4e28c6b238b48..a91a7043c1b8d4 100644 --- a/InternalDocs/code_objects.md +++ b/InternalDocs/code_objects.md @@ -18,6 +18,11 @@ Code objects are typically produced by the bytecode [compiler](compiler.md), although they are often written to disk by one process and read back in by another. The disk version of a code object is serialized using the [marshal](https://docs.python.org/dev/library/marshal.html) protocol. +When a `CodeObject` is created, the function `_PyCode_Quicken()` from +[`Python/specialize.c`](../Python/specialize.c) is called to initialize +the caches of all adaptive instructions. This is required because the +on-disk format is a sequence of bytes, and some of the caches need to be +initialized with 16-bit values. Code objects are nominally immutable. Some fields (including `co_code_adaptive` and fields for runtime diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index 9e99f348acbd8f..c257bfd9faf78f 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -595,16 +595,6 @@ Objects * [Exception Handling](exception_handling.md): Describes the exception table -Specializing Adaptive Interpreter -================================= - -Adding a specializing, adaptive interpreter to CPython will bring significant -performance improvements. These documents provide more information: - -* [PEP 659: Specializing Adaptive Interpreter](https://peps.python.org/pep-0659/). -* [Adding or extending a family of adaptive instructions](adaptive.md) - - References ========== diff --git a/InternalDocs/interpreter.md b/InternalDocs/interpreter.md index ab149e43471072..fa4a54fdc54fac 100644 --- a/InternalDocs/interpreter.md +++ b/InternalDocs/interpreter.md @@ -1,8 +1,4 @@ -The bytecode interpreter -======================== - -Overview --------- +# The bytecode interpreter This document describes the workings and implementation of the bytecode interpreter, the part of python that executes compiled Python code. Its @@ -47,8 +43,7 @@ simply calls [`_PyEval_EvalFrameDefault()`] to execute the frame. However, as pe `_PyEval_EvalFrameDefault()`. -Instruction decoding --------------------- +## Instruction decoding The first task of the interpreter is to decode the bytecode instructions. Bytecode is stored as an array of 16-bit code units (`_Py_CODEUNIT`). @@ -110,8 +105,7 @@ snippet decode a complete instruction: For various reasons we'll get to later (mostly efficiency, given that `EXTENDED_ARG` is rare) the actual code is different. -Jumps -===== +## Jumps Note that when the `switch` statement is reached, `next_instr` (the "instruction offset") already points to the next instruction. @@ -120,25 +114,26 @@ Thus, jump instructions can be implemented by manipulating `next_instr`: - A jump forward (`JUMP_FORWARD`) sets `next_instr += oparg`. - A jump backward sets `next_instr -= oparg`. -Inline cache entries -==================== +## Inline cache entries Some (specialized or specializable) instructions have an associated "inline cache". The inline cache consists of one or more two-byte entries included in the bytecode array as additional words following the `opcode`/`oparg` pair. The size of the inline cache for a particular instruction is fixed by its `opcode`. Moreover, the inline cache size for all instructions in a -[family of specialized/specializable instructions](adaptive.md) +[family of specialized/specializable instructions](#Specialization) (for example, `LOAD_ATTR`, `LOAD_ATTR_SLOT`, `LOAD_ATTR_MODULE`) must all be the same. Cache entries are reserved by the compiler and initialized with zeros. Although they are represented by code units, cache entries do not conform to the `opcode` / `oparg` format. -If an instruction has an inline cache, the layout of its cache is described by -a `struct` definition in (`pycore_code.h`)[../Include/internal/pycore_code.h]. -This allows us to access the cache by casting `next_instr` to a pointer to this `struct`. -The size of such a `struct` must be independent of the machine architecture, word size -and alignment requirements. For a 32-bit field, the `struct` should use `_Py_CODEUNIT field[2]`. +If an instruction has an inline cache, the layout of its cache is described in +the instruction's definition in [`Python/bytecodes.c`](../Python/bytecodes.c). +The structs defined in [`pycore_code.h`](../Include/internal/pycore_code.h) +allow us to access the cache by casting `next_instr` to a pointer to the relevant +`struct`. The size of such a `struct` must be independent of the machine +architecture, word size and alignment requirements. For a 32-bit field, the +`struct` should use `_Py_CODEUNIT field[2]`. The instruction implementation is responsible for advancing `next_instr` past the inline cache. For example, if an instruction's inline cache is four bytes (that is, two code units) in size, @@ -153,8 +148,7 @@ Serializing non-zero cache entries would present a problem because the serializa More information about the use of inline caches can be found in [PEP 659](https://peps.python.org/pep-0659/#ancillary-data). -The evaluation stack --------------------- +## The evaluation stack Most instructions read or write some data in the form of object references (`PyObject *`). The CPython bytecode interpreter is a stack machine, meaning that its instructions operate @@ -193,16 +187,14 @@ For example, the following sequence is illegal, because it keeps pushing items o > Do not confuse the evaluation stack with the call stack, which is used to implement calling > and returning from functions. -Error handling --------------- +## Error handling When the implementation of an opcode raises an exception, it jumps to the `exception_unwind` label in [Python/ceval.c](../Python/ceval.c). The exception is then handled as described in the [`exception handling documentation`](exception_handling.md#handling-exceptions). -Python-to-Python calls ----------------------- +## Python-to-Python calls The `_PyEval_EvalFrameDefault()` function is recursive, because sometimes the interpreter calls some C function that calls back into the interpreter. @@ -227,8 +219,7 @@ returns from `_PyEval_EvalFrameDefault()` altogether, to a C caller. A similar check is performed when an unhandled exception occurs. -The call stack --------------- +## The call stack Up through 3.10, the call stack was implemented as a singly-linked list of [frame objects](frames.md). This was expensive because each call would require a @@ -262,8 +253,7 @@ See also the [generators](generators.md) section. -Introducing a new bytecode instruction --------------------------------------- +## Introducing a new bytecode instruction It is occasionally necessary to add a new opcode in order to implement a new feature or change the way that existing features are compiled. @@ -355,6 +344,169 @@ new bytecode properly. Run `make regen-importlib` for updating the bytecode of frozen importlib files. You have to run `make` again after this to recompile the generated C files. +## Specialization + +Bytecode specialization, which was introduced in +[PEP 659](https://peps.python.org/pep-0659/), speeds up program execution by +rewriting instructions based on runtime information. This is done by replacing +a generic instruction with a faster version that works for the case that this +program encounters. Each specializable instruction is responsible for rewriting +itself, using its [inline caches](#inline-cache-entries) for +bookkeeping. + +When an adaptive instruction executes, it may attempt to specialize itself, +depending on the argument and the contents of its cache. This is done +by calling one of the `_Py_Specialize_XXX` functions in +[`Python/specialize.c`](../Python/specialize.c). + + +The specialized instructions are responsible for checking that the special-case +assumptions still apply, and de-optimizing back to the generic version if not. + +## Families of instructions + +A *family* of instructions consists of an adaptive instruction along with the +specialized instructions that it can be replaced by. +It has the following fundamental properties: + +* It corresponds to a single instruction in the code + generated by the bytecode compiler. +* It has a single adaptive instruction that records an execution count and, + at regular intervals, attempts to specialize itself. If not specializing, + it executes the base implementation. +* It has at least one specialized form of the instruction that is tailored + for a particular value or set of values at runtime. +* All members of the family must have the same number of inline cache entries, + to ensure correct execution. + Individual family members do not need to use all of the entries, + but must skip over any unused entries when executing. + +The current implementation also requires the following, +although these are not fundamental and may change: + +* All families use one or more inline cache entries, + the first entry is always the counter. +* All instruction names should start with the name of the adaptive + instruction. +* Specialized forms should have names describing their specialization. + +## Example family + +The `LOAD_GLOBAL` instruction (in [Python/bytecodes.c](../Python/bytecodes.c)) +already has an adaptive family that serves as a relatively simple example. + +The `LOAD_GLOBAL` instruction performs adaptive specialization, +calling `_Py_Specialize_LoadGlobal()` when the counter reaches zero. + +There are two specialized instructions in the family, `LOAD_GLOBAL_MODULE` +which is specialized for global variables in the module, and +`LOAD_GLOBAL_BUILTIN` which is specialized for builtin variables. + +## Performance analysis + +The benefit of a specialization can be assessed with the following formula: +`Tbase/Tadaptive`. + +Where `Tbase` is the mean time to execute the base instruction, +and `Tadaptive` is the mean time to execute the specialized and adaptive forms. + +`Tadaptive = (sum(Ti*Ni) + Tmiss*Nmiss)/(sum(Ni)+Nmiss)` + +`Ti` is the time to execute the `i`th instruction in the family and `Ni` is +the number of times that instruction is executed. +`Tmiss` is the time to process a miss, including de-optimzation +and the time to execute the base instruction. + +The ideal situation is where misses are rare and the specialized +forms are much faster than the base instruction. +`LOAD_GLOBAL` is near ideal, `Nmiss/sum(Ni) ≈ 0`. +In which case we have `Tadaptive ≈ sum(Ti*Ni)`. +Since we can expect the specialized forms `LOAD_GLOBAL_MODULE` and +`LOAD_GLOBAL_BUILTIN` to be much faster than the adaptive base instruction, +we would expect the specialization of `LOAD_GLOBAL` to be profitable. + +## Design considerations + +While `LOAD_GLOBAL` may be ideal, instructions like `LOAD_ATTR` and +`CALL_FUNCTION` are not. For maximum performance we want to keep `Ti` +low for all specialized instructions and `Nmiss` as low as possible. + +Keeping `Nmiss` low means that there should be specializations for almost +all values seen by the base instruction. Keeping `sum(Ti*Ni)` low means +keeping `Ti` low which means minimizing branches and dependent memory +accesses (pointer chasing). These two objectives may be in conflict, +requiring judgement and experimentation to design the family of instructions. + +The size of the inline cache should as small as possible, +without impairing performance, to reduce the number of +`EXTENDED_ARG` jumps, and to reduce pressure on the CPU's data cache. + +### Gathering data + +Before choosing how to specialize an instruction, it is important to gather +some data. What are the patterns of usage of the base instruction? +Data can best be gathered by instrumenting the interpreter. Since a +specialization function and adaptive instruction are going to be required, +instrumentation can most easily be added in the specialization function. + +### Choice of specializations + +The performance of the specializing adaptive interpreter relies on the +quality of specialization and keeping the overhead of specialization low. + +Specialized instructions must be fast. In order to be fast, +specialized instructions should be tailored for a particular +set of values that allows them to: + +1. Verify that incoming value is part of that set with low overhead. +2. Perform the operation quickly. + +This requires that the set of values is chosen such that membership can be +tested quickly and that membership is sufficient to allow the operation to +performed quickly. + +For example, `LOAD_GLOBAL_MODULE` is specialized for `globals()` +dictionaries that have a keys with the expected version. + +This can be tested quickly: + +* `globals->keys->dk_version == expected_version` + +and the operation can be performed quickly: + +* `value = entries[cache->index].me_value;`. + +Because it is impossible to measure the performance of an instruction without +also measuring unrelated factors, the assessment of the quality of a +specialization will require some judgement. + +As a general rule, specialized instructions should be much faster than the +base instruction. + +### Implementation of specialized instructions + +In general, specialized instructions should be implemented in two parts: + +1. A sequence of guards, each of the form + `DEOPT_IF(guard-condition-is-false, BASE_NAME)`. +2. The operation, which should ideally have no branches and + a minimum number of dependent memory accesses. + +In practice, the parts may overlap, as data required for guards +can be re-used in the operation. + +If there are branches in the operation, then consider further specialization +to eliminate the branches. + +### Maintaining stats + +Finally, take care that stats are gathered correctly. +After the last `DEOPT_IF` has passed, a hit should be recorded with +`STAT_INC(BASE_INSTRUCTION, hit)`. +After an optimization has been deferred in the adaptive instruction, +that should be recorded with `STAT_INC(BASE_INSTRUCTION, deferred)`. + + Additional resources -------------------- diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md new file mode 100644 index 00000000000000..1e9f385d5f87fa --- /dev/null +++ b/InternalDocs/jit.md @@ -0,0 +1,134 @@ +# The JIT + +The [adaptive interpreter](interpreter.md) consists of a main loop that +executes the bytecode instructions generated by the +[bytecode compiler](compiler.md) and their +[specializations](interpreter.md#Specialization). Runtime optimization in +this interpreter can only be done for one instruction at a time. The JIT +is based on a mechanism to replace an entire sequence of bytecode instructions, +and this enables optimizations that span multiple instructions. + +Historically, the adaptive interpreter was referred to as `tier 1` and +the JIT as `tier 2`. You will see remnants of this in the code. + +## The Optimizer and Executors + +The program begins running on the adaptive interpreter, until a `JUMP_BACKWARD` +instruction determines that it is "hot" because the counter in its +[inline cache](interpreter.md#inline-cache-entries) indicates that it +executed more than some threshold number of times (see +[`backoff_counter_triggers`](../Include/internal/pycore_backoff.h)). +It then calls the function `_PyOptimizer_Optimize()` in +[`Python/optimizer.c`](../Python/optimizer.c), passing it the current +[frame](frames.md) and instruction pointer. `_PyOptimizer_Optimize()` +constructs an object of type +[`_PyExecutorObject`](Include/internal/pycore_optimizer.h) which implements +an optimized version of the instruction trace beginning at this jump. + +The optimizer determines where the trace ends, and the executor is set up +to either return to the adaptive interpreter and resume execution, or +transfer control to another executor (see `_PyExitData` in +Include/internal/pycore_optimizer.h). + +The executor is stored on the [`code object`](code_objects.md) of the frame, +in the `co_executors` field which is an array of executors. The start +instruction of the trace (the `JUMP_BACKWARD`) is replaced by an +`ENTER_EXECUTOR` instruction whose `oparg` is equal to the index of the +executor in `co_executors`. + +## The micro-op optimizer + +The optimizer that `_PyOptimizer_Optimize()` runs is configurable via the +`_Py_SetTier2Optimizer()` function (this is used in test via +`_testinternalcapi.set_optimizer()`.) + +The micro-op (abbreviated `uop` to approximate `μop`) optimizer is defined in +[`Python/optimizer.c`](../Python/optimizer.c) as the type `_PyUOpOptimizer_Type`. +It translates an instruction trace into a sequence of micro-ops by replacing +each bytecode by an equivalent sequence of micro-ops (see +`_PyOpcode_macro_expansion` in +[pycore_opcode_metadata.h](../Include/internal/pycore_opcode_metadata.h) +which is generated from [`Python/bytecodes.c`](../Python/bytecodes.c)). +The micro-op sequence is then optimized by +`_Py_uop_analyze_and_optimize` in +[`Python/optimizer_analysis.c`](../Python/optimizer_analysis.c) +and an instance of `_PyUOpExecutor_Type` is created to contain it. + +## The JIT interpreter + +After a `JUMP_BACKWARD` instruction invokes the uop optimizer to create a uop +executor, it transfers control to this executor via the `GOTO_TIER_TWO` macro. + +CPython implements two executors. Here we describe the JIT interpreter, +which is the simpler of them and is therefore useful for debugging and analyzing +the uops generation and optimization stages. To run it, we configure the +JIT to run on its interpreter (i.e., python is configured with +[`--enable-experimental-jit=interpreter`](https://docs.python.org/dev/using/configure.html#cmdoption-enable-experimental-jit)). + +When invoked, the executor jumps to the `tier2_dispatch:` label in +[`Python/ceval.c`](../Python/ceval.c), where there is a loop that +executes the micro-ops. The body of this loop is a switch statement over +the uops IDs, resembling the one used in the adaptive interpreter. + +The swtich implementing the uops is in [`Python/executor_cases.c.h`](../Python/executor_cases.c.h), +which is generated by the build script +[`Tools/cases_generator/tier2_generator.py`](../Tools/cases_generator/tier2_generator.py) +from the bytecode definitions in +[`Python/bytecodes.c`](../Python/bytecodes.c). + +When an `_EXIT_TRACE` or `_DEOPT` uop is reached, the uop interpreter exits +and execution returns to the adaptive interpreter. + +## Invalidating Executors + +In addition to being stored on the code object, each executor is also +inserted into a list of all executors, which is stored in the interpreter +state's `executor_list_head` field. This list is used when it is necessary +to invalidate executors because values they used in their construction may +have changed. + +## The JIT + +When the full jit is enabled (python was configured with +[`--enable-experimental-jit`](https://docs.python.org/dev/using/configure.html#cmdoption-enable-experimental-jit), +the uop executor's `jit_code` field is populated with a pointer to a compiled +C function that implements the executor logic. This function's signature is +defined by `jit_func` in [`pycore_jit.h`](Include/internal/pycore_jit.h). +When the executor is invoked by `ENTER_EXECUTOR`, instead of jumping to +the uop interpreter at `tier2_dispatch`, the executor runs the function +that `jit_code` points to. This function returns the instruction pointer +of the next Tier 1 instruction that needs to execute. + +The generation of the jitted functions uses the copy-and-patch technique +which is described in +[Haoran Xu's article](https://sillycross.github.io/2023/05/12/2023-05-12/). +At its core are statically generated `stencils` for the implementation +of the micro ops, which are completed with runtime information while +the jitted code is constructed for an executor by +[`_PyJIT_Compile`](../Python/jit.c). + +The stencils are generated at build time under the Makefile target `regen-jit` +by the scripts in [`/Tools/jit`](/Tools/jit). This script reads +[`Python/executor_cases.c.h`](../Python/executor_cases.c.h) (which is +generated from [`Python/bytecodes.c`](../Python/bytecodes.c)). For +each opcode, it constructs a `.c` file that contains a function for +implementing this opcode, with some runtime information injected. +This is done by replacing `CASE` by the bytecode definition in the +template file [`Tools/jit/template.c`](../Tools/jit/template.c). + +Each of the `.c` files is compiled by LLVM, to produce an object file +that contains a function that executes the opcode. These compiled +functions are used to generate the file +[`jit_stencils.h`](../jit_stencils.h), which contains the functions +that the JIT can use to emit code for each of the bytecodes. + +For Python maintainers this means that changes to the bytecodes and +their implementations do not require changes related to the stencils, +because everything is automatically generated from +[`Python/bytecodes.c`](../Python/bytecodes.c) at build time. + +See Also: + +* [Copy-and-Patch Compilation: A fast compilation algorithm for high-level languages and bytecode](https://arxiv.org/abs/2011.13127) + +* [PyCon 2024: Building a JIT compiler for CPython](https://www.youtube.com/watch?v=kMO3Ju0QCDo) From e59caf67cdb8dae26470f00599ea8dbb00968a73 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Fri, 6 Dec 2024 17:50:58 +0000 Subject: [PATCH 09/55] Fix typo in `Lib/_android_support.py` (#127699) --- Lib/_android_support.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_android_support.py b/Lib/_android_support.py index 7572745c851847..ae506f6a4b57b8 100644 --- a/Lib/_android_support.py +++ b/Lib/_android_support.py @@ -6,7 +6,7 @@ # The maximum length of a log message in bytes, including the level marker and # tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. -# Messages longer than this will be be truncated by logcat. This limit has already +# Messages longer than this will be truncated by logcat. This limit has already # been reduced at least once in the history of Android (from 4076 to 4068 between # API level 23 and 26), so leave some headroom. MAX_BYTES_PER_WRITE = 4000 From 5b6635f772d187d6049a56bfea76855644cd4ca1 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 6 Dec 2024 18:10:00 +0000 Subject: [PATCH 10/55] GH-127381: pathlib ABCs: remove `PathBase.rename()` and `replace()` (#127658) These methods are obviated by `PathBase.move()`, which can move directories and supports any `PathBase` object as a target. --- Lib/pathlib/_abc.py | 37 +---------------------- Lib/pathlib/_local.py | 17 +++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 2 -- 3 files changed, 18 insertions(+), 38 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 86617ff2616f33..11a11ecc4c8203 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -14,7 +14,7 @@ import functools import operator import posixpath -from errno import EINVAL, EXDEV +from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from pathlib._os import copyfileobj @@ -902,45 +902,10 @@ def copy_into(self, target_dir, *, follow_symlinks=True, dirs_exist_ok=dirs_exist_ok, preserve_metadata=preserve_metadata) - def rename(self, target): - """ - Rename this path to the target path. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - raise UnsupportedOperation(self._unsupported_msg('rename()')) - - def replace(self, target): - """ - Rename this path to the target path, overwriting if that path exists. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - raise UnsupportedOperation(self._unsupported_msg('replace()')) - def move(self, target): """ Recursively move this file or directory tree to the given destination. """ - self._ensure_different_file(target) - try: - return self.replace(target) - except UnsupportedOperation: - pass - except TypeError: - if not isinstance(target, PathBase): - raise - except OSError as err: - if err.errno != EXDEV: - raise target = self.copy(target, follow_symlinks=False, preserve_metadata=True) self._delete() return target diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index bb8a252c0e94e2..250bc12956f5bc 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -4,6 +4,7 @@ import os import posixpath import sys +from errno import EXDEV from glob import _StringGlobber from itertools import chain from _collections_abc import Sequence @@ -876,6 +877,22 @@ def replace(self, target): os.replace(self, target) return self.with_segments(target) + def move(self, target): + """ + Recursively move this file or directory tree to the given destination. + """ + self._ensure_different_file(target) + try: + return self.replace(target) + except TypeError: + if not isinstance(target, PathBase): + raise + except OSError as err: + if err.errno != EXDEV: + raise + # Fall back to copy+delete. + return PathBase.move(self, target) + if hasattr(os, "symlink"): def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 7ba3fa823a30b9..00153e3f5e997e 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1376,8 +1376,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.hardlink_to, 'foo') self.assertRaises(e, p.mkdir) self.assertRaises(e, p.touch) - self.assertRaises(e, p.rename, 'foo') - self.assertRaises(e, p.replace, 'foo') self.assertRaises(e, p.chmod, 0o755) self.assertRaises(e, p.lchmod, 0o755) self.assertRaises(e, p.unlink) From 0fc4063747c96223575f6f5a0562eddf2ed0ed62 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Fri, 6 Dec 2024 10:42:05 -0800 Subject: [PATCH 11/55] GH-127652: stop using `--wasi preview2` in `wasi.py` (GH-127704) It's only to use WASI 0.2 code to back preview1 APIs and is considered experimental anyway. --- Tools/wasm/wasi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Tools/wasm/wasi.py b/Tools/wasm/wasi.py index ac36d55587a38f..da847c4ff86215 100644 --- a/Tools/wasm/wasi.py +++ b/Tools/wasm/wasi.py @@ -297,8 +297,6 @@ def main(): # build. # Use 16 MiB stack. "--wasm max-wasm-stack=16777216 " - # Use WASI 0.2 primitives. - "--wasi preview2 " # Enable thread support; causes use of preview1. #"--wasm threads=y --wasi threads=y " # Map the checkout to / to load the stdlib from /Lib. From 31c9f3ced293492b38e784c17c4befe425da5dab Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 6 Dec 2024 21:39:45 +0000 Subject: [PATCH 12/55] GH-127381: pathlib ABCs: remove `PathBase.resolve()` and `absolute()` (#127707) Remove our implementation of POSIX path resolution in `PathBase.resolve()`. This functionality is rather fragile and isn't necessary in most cases. It depends on `PathBase.stat()`, which we're looking to remove. Also remove `PathBase.absolute()`. Many legitimate virtual filesystems lack the notion of a 'current directory', so it's wrong to include in the basic interface. --- Lib/pathlib/_abc.py | 64 +- Lib/test/test_pathlib/test_pathlib.py | 586 ++++++++++++++++++- Lib/test/test_pathlib/test_pathlib_abc.py | 680 +--------------------- 3 files changed, 599 insertions(+), 731 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 11a11ecc4c8203..820970fcd5889b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -13,7 +13,6 @@ import functools import operator -import posixpath from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -115,11 +114,6 @@ class PurePathBase: # The `_raw_paths` slot stores unjoined string paths. This is set in # the `__init__()` method. '_raw_paths', - - # The '_resolving' slot stores a boolean indicating whether the path - # is being processed by `PathBase.resolve()`. This prevents duplicate - # work from occurring when `resolve()` calls `stat()` or `readlink()`. - '_resolving', ) parser = ParserBase() _globber = PathGlobber @@ -130,7 +124,6 @@ def __init__(self, *args): raise TypeError( f"argument should be a str, not {type(arg).__name__!r}") self._raw_paths = list(args) - self._resolving = False def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. @@ -339,9 +332,7 @@ def parent(self): path = str(self) parent = self.parser.split(path)[0] if path != parent: - parent = self.with_segments(parent) - parent._resolving = self._resolving - return parent + return self.with_segments(parent) return self @property @@ -424,9 +415,6 @@ class PathBase(PurePathBase): """ __slots__ = () - # Maximum number of symlinks to follow in resolve() - _max_symlinks = 40 - @classmethod def _unsupported_msg(cls, attribute): return f"{cls.__name__}.{attribute} is unsupported" @@ -720,20 +708,6 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): yield path, dirnames, filenames paths += [path.joinpath(d) for d in reversed(dirnames)] - def absolute(self): - """Return an absolute version of this path - No normalization or symlink resolution is performed. - - Use resolve() to resolve symlinks and remove '..' segments. - """ - if self.is_absolute(): - return self - elif self.parser is not posixpath: - raise UnsupportedOperation(self._unsupported_msg('absolute()')) - else: - # Treat the root directory as the current working directory. - return self.with_segments('/', *self._raw_paths) - def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) @@ -745,42 +719,6 @@ def readlink(self): Return the path to which the symbolic link points. """ raise UnsupportedOperation(self._unsupported_msg('readlink()')) - readlink._supported = False - - def resolve(self, strict=False): - """ - Make the path absolute, resolving all symlinks on the way and also - normalizing it. - """ - if self._resolving: - return self - elif self.parser is not posixpath: - raise UnsupportedOperation(self._unsupported_msg('resolve()')) - - def raise_error(*args): - raise OSError("Unsupported operation.") - - getcwd = raise_error - if strict or getattr(self.readlink, '_supported', True): - def lstat(path_str): - path = self.with_segments(path_str) - path._resolving = True - return path.stat(follow_symlinks=False) - - def readlink(path_str): - path = self.with_segments(path_str) - path._resolving = True - return str(path.readlink()) - else: - # If the user has *not* overridden the `readlink()` method, then - # symlinks are unsupported and (in non-strict mode) we can improve - # performance by not calling `path.lstat()`. - lstat = readlink = raise_error - - return self.with_segments(posixpath._realpath( - str(self.absolute()), strict, self.parser.sep, - getcwd=getcwd, lstat=lstat, readlink=readlink, - maxlinks=self._max_symlinks)) def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2c48eeeda145d0..8c9049f15d5bf9 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1,3 +1,4 @@ +import collections import contextlib import io import os @@ -21,7 +22,7 @@ from test.support import os_helper from test.support.os_helper import TESTFN, FakePath from test.test_pathlib import test_pathlib_abc -from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows, needs_symlinks +from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows try: import fcntl @@ -55,6 +56,13 @@ def new_test(self): self.cls.replace = old_replace return new_test + +_tests_needing_symlinks = set() +def needs_symlinks(fn): + """Decorator that marks a test as requiring a path class that supports symlinks.""" + _tests_needing_symlinks.add(fn.__name__) + return fn + # # Tests for the pure classes. # @@ -533,6 +541,9 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest): can_symlink = os_helper.can_symlink() def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') super().setUp() os.chmod(self.parser.join(self.base, 'dirE'), 0) @@ -693,6 +704,34 @@ def test_copy_file_preserve_metadata(self): if hasattr(source_st, 'st_flags'): self.assertEqual(source_st.st_flags, target_st.st_flags) + @needs_symlinks + def test_copy_file_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + + @needs_symlinks + def test_copy_file_to_existing_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + @os_helper.skip_unless_xattr def test_copy_file_preserve_metadata_xattrs(self): base = self.cls(self.base) @@ -702,6 +741,118 @@ def test_copy_file_preserve_metadata_xattrs(self): source.copy(target, preserve_metadata=True) self.assertEqual(os.getxattr(target, b'user.foo'), b'42') + @needs_symlinks + def test_copy_symlink_follow_symlinks_true(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertFalse(target.is_symlink()) + self.assertEqual(source.read_text(), target.read_text()) + + @needs_symlinks + def test_copy_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + @needs_symlinks + def test_copy_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.copy, source) + + @needs_symlinks + def test_copy_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'copyA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + @needs_symlinks + def test_copy_directory_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_into_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'linkB' / 'copyB' + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + self.assertFalse(target.exists()) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC') + target.symlink_to(base / 'fileA') + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC' / 'dirD') + target.symlink_to(base / 'dirC') + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'source' + target = base / 'target' + + source.mkdir() + source.joinpath('link').symlink_to('nonexistent') + + self.assertRaises(FileNotFoundError, source.copy, target) + + target2 = base / 'target2' + result = source.copy(target2, follow_symlinks=False) + self.assertEqual(result, target2) + self.assertTrue(target2.joinpath('link').is_symlink()) + self.assertEqual(target2.joinpath('link').readlink(), self.cls('nonexistent')) + @needs_symlinks def test_copy_link_preserve_metadata(self): base = self.cls(self.base) @@ -801,6 +952,54 @@ def test_copy_dir_preserve_metadata_xattrs(self): target_file = target.joinpath('dirD', 'fileD') self.assertEqual(os.getxattr(target_file, b'user.foo'), b'42') + @needs_symlinks + def test_move_file_symlink(self): + base = self.cls(self.base) + source = base / 'linkA' + source_readlink = source.readlink() + target = base / 'linkA_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + @needs_symlinks + def test_move_file_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.move, source) + + @needs_symlinks + def test_move_dir_symlink(self): + base = self.cls(self.base) + source = base / 'linkB' + source_readlink = source.readlink() + target = base / 'linkB_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + @needs_symlinks + def test_move_dir_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.move, source) + + @needs_symlinks + def test_move_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'brokenLink' + source_readlink = source.readlink() + target = base / 'brokenLink_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + @patch_replace def test_move_file_other_fs(self): self.test_move_file() @@ -858,9 +1057,41 @@ def test_move_into_other_os(self): def test_move_into_empty_name_other_os(self): self.test_move_into_empty_name() + @needs_symlinks + def test_complex_symlinks_absolute(self): + self._check_complex_symlinks(self.base) + + @needs_symlinks + def test_complex_symlinks_relative(self): + self._check_complex_symlinks('.') + + @needs_symlinks + def test_complex_symlinks_relative_dot_dot(self): + self._check_complex_symlinks(self.parser.join('dirA', '..')) + def _check_complex_symlinks(self, link0_target): - super()._check_complex_symlinks(link0_target) + # Test solving a non-looping chain of symlinks (issue #19887). + parser = self.parser P = self.cls(self.base) + P.joinpath('link1').symlink_to(parser.join('link0', 'link0'), target_is_directory=True) + P.joinpath('link2').symlink_to(parser.join('link1', 'link1'), target_is_directory=True) + P.joinpath('link3').symlink_to(parser.join('link2', 'link2'), target_is_directory=True) + P.joinpath('link0').symlink_to(link0_target, target_is_directory=True) + + # Resolve absolute paths. + p = (P / 'link0').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link1').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link2').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link3').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + # Resolve relative paths. old_path = os.getcwd() os.chdir(self.base) @@ -880,6 +1111,118 @@ def _check_complex_symlinks(self, link0_target): finally: os.chdir(old_path) + def _check_resolve(self, p, expected, strict=True): + q = p.resolve(strict) + self.assertEqual(q, expected) + + # This can be used to check both relative and absolute resolutions. + _check_resolve_relative = _check_resolve_absolute = _check_resolve + + @needs_symlinks + def test_resolve_common(self): + P = self.cls + p = P(self.base, 'foo') + with self.assertRaises(OSError) as cm: + p.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOENT) + # Non-strict + parser = self.parser + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(self.base, 'foo')) + p = P(self.base, 'foo', 'in', 'spam') + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(self.base, 'foo', 'in', 'spam')) + p = P(self.base, '..', 'foo', 'in', 'spam') + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(parser.dirname(self.base), 'foo', 'in', 'spam')) + # These are all relative symlinks. + p = P(self.base, 'dirB', 'fileB') + self._check_resolve_relative(p, p) + p = P(self.base, 'linkA') + self._check_resolve_relative(p, P(self.base, 'fileA')) + p = P(self.base, 'dirA', 'linkC', 'fileB') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) + p = P(self.base, 'dirB', 'linkD', 'fileB') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) + # Non-strict + p = P(self.base, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB', 'foo', 'in', + 'spam'), False) + p = P(self.base, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') + if self.cls.parser is not posixpath: + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(self.base, 'dirA', 'foo', 'in', + 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + # Now create absolute symlinks. + d = self.tempdir() + P(self.base, 'dirA', 'linkX').symlink_to(d) + P(self.base, str(d), 'linkY').symlink_to(self.parser.join(self.base, 'dirB')) + p = P(self.base, 'dirA', 'linkX', 'linkY', 'fileB') + self._check_resolve_absolute(p, P(self.base, 'dirB', 'fileB')) + # Non-strict + p = P(self.base, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(self.base, 'dirB', 'foo', 'in', 'spam'), + False) + p = P(self.base, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') + if self.cls.parser is not posixpath: + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + + @needs_symlinks + def test_resolve_dot(self): + # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ + parser = self.parser + p = self.cls(self.base) + p.joinpath('0').symlink_to('.', target_is_directory=True) + p.joinpath('1').symlink_to(parser.join('0', '0'), target_is_directory=True) + p.joinpath('2').symlink_to(parser.join('1', '1'), target_is_directory=True) + q = p / '2' + self.assertEqual(q.resolve(strict=True), p) + r = q / '3' / '4' + self.assertRaises(FileNotFoundError, r.resolve, strict=True) + # Non-strict + self.assertEqual(r.resolve(strict=False), p / '3' / '4') + + def _check_symlink_loop(self, *args): + path = self.cls(*args) + with self.assertRaises(OSError) as cm: + path.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ELOOP) + + @needs_posix + @needs_symlinks + def test_resolve_loop(self): + # Loops with relative symlinks. + self.cls(self.base, 'linkX').symlink_to('linkX/inside') + self._check_symlink_loop(self.base, 'linkX') + self.cls(self.base, 'linkY').symlink_to('linkY') + self._check_symlink_loop(self.base, 'linkY') + self.cls(self.base, 'linkZ').symlink_to('linkZ/../linkZ') + self._check_symlink_loop(self.base, 'linkZ') + # Non-strict + p = self.cls(self.base, 'linkZ', 'foo') + self.assertEqual(p.resolve(strict=False), p) + # Loops with absolute symlinks. + self.cls(self.base, 'linkU').symlink_to(self.parser.join(self.base, 'linkU/inside')) + self._check_symlink_loop(self.base, 'linkU') + self.cls(self.base, 'linkV').symlink_to(self.parser.join(self.base, 'linkV')) + self._check_symlink_loop(self.base, 'linkV') + self.cls(self.base, 'linkW').symlink_to(self.parser.join(self.base, 'linkW/../linkW')) + self._check_symlink_loop(self.base, 'linkW') + # Non-strict + q = self.cls(self.base, 'linkW', 'foo') + self.assertEqual(q.resolve(strict=False), q) + def test_resolve_nonexist_relative_issue38671(self): p = self.cls('non', 'exist') @@ -890,6 +1233,24 @@ def test_resolve_nonexist_relative_issue38671(self): finally: os.chdir(old_cwd) + @needs_symlinks + def test_readlink(self): + P = self.cls(self.base) + self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) + self.assertEqual((P / 'brokenLink').readlink(), + self.cls('non-existing')) + self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) + self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) + with self.assertRaises(OSError): + (P / 'fileA').readlink() + + @unittest.skipIf(hasattr(os, "readlink"), "os.readlink() is present") + def test_readlink_unsupported(self): + P = self.cls(self.base) + p = P / 'fileA' + with self.assertRaises(pathlib.UnsupportedOperation): + q.readlink(p) + @os_helper.skip_unless_working_chmod def test_chmod(self): p = self.cls(self.base) / 'fileA' @@ -991,6 +1352,41 @@ def test_group_no_follow_symlinks(self): self.assertEqual(expected_gid, gid_2) self.assertEqual(expected_name, link.group(follow_symlinks=False)) + @needs_symlinks + def test_delete_symlink(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir_ = tmp / 'dir' + dir_.mkdir() + link = tmp / 'link' + link.symlink_to(dir_) + link._delete() + self.assertTrue(dir_.exists()) + self.assertFalse(link.exists(follow_symlinks=False)) + + @needs_symlinks + def test_delete_inner_symlink(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir1 = tmp / 'dir1' + dir2 = dir1 / 'dir2' + dir3 = tmp / 'dir3' + for d in dir1, dir2, dir3: + d.mkdir() + file1 = tmp / 'file1' + file1.write_text('foo') + link1 = dir1 / 'link1' + link1.symlink_to(dir2) + link2 = dir1 / 'link2' + link2.symlink_to(dir3) + link3 = dir1 / 'link3' + link3.symlink_to(file1) + # make sure symlinks are removed but not followed + dir1._delete() + self.assertFalse(dir1.exists()) + self.assertTrue(dir3.exists()) + self.assertTrue(file1.exists()) + @unittest.skipIf(sys.platform[:6] == 'cygwin', "This test can't be run on Cygwin (issue #1071513).") @os_helper.skip_if_dac_override @@ -1354,6 +1750,12 @@ def test_symlink_to_unsupported(self): with self.assertRaises(pathlib.UnsupportedOperation): q.symlink_to(p) + @needs_symlinks + def test_stat_no_follow_symlinks(self): + p = self.cls(self.base) / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.stat(follow_symlinks=False)) + @needs_symlinks def test_lstat(self): p = self.cls(self.base)/ 'linkA' @@ -1433,6 +1835,15 @@ def test_passing_kwargs_errors(self): with self.assertRaises(TypeError): self.cls(foo="bar") + @needs_symlinks + def test_iterdir_symlink(self): + # __iter__ on a symlink to a directory. + P = self.cls + p = P(self.base, 'linkB') + paths = set(p.iterdir()) + expected = { P(self.base, 'linkB', q) for q in ['fileB', 'linkD'] } + self.assertEqual(paths, expected) + def test_glob_empty_pattern(self): p = self.cls('') with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): @@ -1493,6 +1904,25 @@ def test_glob_dot(self): self.assertEqual( set(P('.').glob('**/*/*')), {P("dirD/fileD")}) + # See https://github.com/WebAssembly/wasi-filesystem/issues/26 + @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") + def test_glob_dotdot(self): + # ".." is not special in globs. + P = self.cls + p = P(self.base) + self.assertEqual(set(p.glob("..")), { P(self.base, "..") }) + self.assertEqual(set(p.glob("../..")), { P(self.base, "..", "..") }) + self.assertEqual(set(p.glob("dirA/..")), { P(self.base, "dirA", "..") }) + self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) + self.assertEqual(set(p.glob("dirA/../file*/..")), set()) + self.assertEqual(set(p.glob("../xyzzy")), set()) + if self.cls.parser is posixpath: + self.assertEqual(set(p.glob("xyzzy/..")), set()) + else: + # ".." segments are normalized first on Windows, so this path is stat()able. + self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) + self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) + def test_glob_inaccessible(self): P = self.cls p = P(self.base, "mydir1", "mydir2") @@ -1508,6 +1938,124 @@ def test_rglob_pathlike(self): self.assertEqual(expect, set(p.rglob(P(pattern)))) self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) + @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") + def test_glob_recurse_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.glob(glob, recurse_symlinks=True) + if path.parts.count("linkD") <= 1} # exclude symlink loop. + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p, "fileB", []) + _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check(p, "*A", ["dirA", "fileA", "linkA"]) + _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) + _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) + _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) + _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) + _check(p, "dir*/**", [ + "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", + "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", + "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE/"]) + _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/"]) + _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", + "dirB/linkD/..", "dirA/linkC/linkD/..", + "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check(p, "dir*/*/**", [ + "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", + "dirB/linkD/", "dirB/linkD/fileB", + "dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) + _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", + "dirB/linkD/..", "dirC/dirD/.."]) + _check(p, "dir*/**/fileC", ["dirC/fileC"]) + _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "*/dirD/**/", ["dirC/dirD/"]) + + @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") + def test_rglob_recurse_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.rglob(glob, recurse_symlinks=True) + if path.parts.count("linkD") <= 1} # exclude symlink loop. + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) + _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB", + "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) + _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) + _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) + + p = P(self.base, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD/"]) + _check(p, "", ["dirC/", "dirC/dirD/"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + + @needs_symlinks + def test_rglob_symlink_loop(self): + # Don't get fooled by symlink loops (Issue #26012). + P = self.cls + p = P(self.base) + given = set(p.rglob('*', recurse_symlinks=False)) + expect = {'brokenLink', + 'dirA', 'dirA/linkC', + 'dirB', 'dirB/fileB', 'dirB/linkD', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', + 'dirC/fileC', 'dirC/novel.txt', + 'dirE', + 'fileA', + 'linkA', + 'linkB', + 'brokenLinkLoop', + } + self.assertEqual(given, {p / x for x in expect}) + + @needs_symlinks + def test_glob_permissions(self): + # See bpo-38894 + P = self.cls + base = P(self.base) / 'permissions' + base.mkdir() + + for i in range(100): + link = base / f"link{i}" + if i % 2: + link.symlink_to(P(self.base, "dirE", "nonexistent")) + else: + link.symlink_to(P(self.base, "dirC"), target_is_directory=True) + + self.assertEqual(len(set(base.glob("*"))), 100) + self.assertEqual(len(set(base.glob("*/"))), 50) + self.assertEqual(len(set(base.glob("*/fileC"))), 50) + self.assertEqual(len(set(base.glob("*/file*"))), 50) + + @needs_symlinks + def test_glob_long_symlink(self): + # See gh-87695 + base = self.cls(self.base) / 'long_symlink' + base.mkdir() + bad_link = base / 'bad_link' + bad_link.symlink_to("bad" * 200) + self.assertEqual(sorted(base.glob('**/*')), [bad_link]) + @needs_posix def test_absolute_posix(self): P = self.cls @@ -1822,6 +2370,9 @@ class PathWalkTest(test_pathlib_abc.DummyPathWalkTest): can_symlink = PathTest.can_symlink def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') super().setUp() sub21_path= self.sub2_path / "SUB21" tmp5_path = sub21_path / "tmp3" @@ -1903,6 +2454,37 @@ def test_walk_above_recursion_limit(self): list(base.walk()) list(base.walk(top_down=False)) + @needs_symlinks + def test_walk_follow_symlinks(self): + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with follow_symlinks=True") + + @needs_symlinks + def test_walk_symlink_location(self): + # Tests whether symlinks end up in filenames or dirnames depending + # on the `follow_symlinks` argument. + walk_it = self.walk_path.walk(follow_symlinks=False) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", files) + break + else: + self.fail("symlink not found") + + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", dirs) + break + else: + self.fail("symlink not found") + @unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') class PosixPathTest(PathTest, PurePosixPathTest): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 00153e3f5e997e..bf9ae6cc8a2433 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -8,13 +8,11 @@ from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase import posixpath -from test.support import is_wasi, is_emscripten from test.support.os_helper import TESTFN _tests_needing_posix = set() _tests_needing_windows = set() -_tests_needing_symlinks = set() def needs_posix(fn): @@ -27,11 +25,6 @@ def needs_windows(fn): _tests_needing_windows.add(fn.__name__) return fn -def needs_symlinks(fn): - """Decorator that marks a test as requiring a path class that supports symlinks.""" - _tests_needing_symlinks.add(fn.__name__) - return fn - class UnsupportedOperationTest(unittest.TestCase): def test_is_notimplemented(self): @@ -1369,7 +1362,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.glob, '*') self.assertRaises(e, p.rglob, '*') self.assertRaises(e, lambda: list(p.walk())) - self.assertRaises(e, p.absolute) self.assertRaises(e, p.expanduser) self.assertRaises(e, p.readlink) self.assertRaises(e, p.symlink_to, 'foo') @@ -1425,7 +1417,6 @@ class DummyPath(PathBase): _files = {} _directories = {} - _symlinks = {} def __eq__(self, other): if not isinstance(other, DummyPath): @@ -1439,16 +1430,11 @@ def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) def stat(self, *, follow_symlinks=True): - if follow_symlinks or self.name in ('', '.', '..'): - path = str(self.resolve(strict=True)) - else: - path = str(self.parent.resolve(strict=True) / self.name) + path = str(self).rstrip('/') if path in self._files: st_mode = stat.S_IFREG elif path in self._directories: st_mode = stat.S_IFDIR - elif path in self._symlinks: - st_mode = stat.S_IFLNK else: raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) return DummyPathStatResult(st_mode, hash(str(self)), 0, 0, 0, 0, 0, 0, 0, 0) @@ -1457,10 +1443,7 @@ def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): if buffering != -1 and not (buffering == 0 and 'b' in mode): raise NotImplementedError - path_obj = self.resolve() - path = str(path_obj) - name = path_obj.name - parent = str(path_obj.parent) + path = str(self) if path in self._directories: raise IsADirectoryError(errno.EISDIR, "Is a directory", path) @@ -1471,6 +1454,7 @@ def open(self, mode='r', buffering=-1, encoding=None, raise FileNotFoundError(errno.ENOENT, "File not found", path) stream = io.BytesIO(self._files[path]) elif mode == 'w': + parent, name = posixpath.split(path) if parent not in self._directories: raise FileNotFoundError(errno.ENOENT, "File not found", parent) stream = DummyPathIO(self._files, path) @@ -1483,7 +1467,7 @@ def open(self, mode='r', buffering=-1, encoding=None, return stream def iterdir(self): - path = str(self.resolve()) + path = str(self).rstrip('/') if path in self._files: raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) elif path in self._directories: @@ -1492,9 +1476,9 @@ def iterdir(self): raise FileNotFoundError(errno.ENOENT, "File not found", path) def mkdir(self, mode=0o777, parents=False, exist_ok=False): - path = str(self.parent.resolve() / self.name) - parent = str(self.parent.resolve()) - if path in self._directories or path in self._symlinks: + path = str(self) + parent = str(self.parent) + if path in self._directories: if exist_ok: return else: @@ -1510,33 +1494,28 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): self.mkdir(mode, parents=False, exist_ok=exist_ok) def unlink(self, missing_ok=False): - path_obj = self.parent.resolve(strict=True) / self.name - path = str(path_obj) - name = path_obj.name - parent = str(path_obj.parent) + path = str(self) + name = self.name + parent = str(self.parent) if path in self._directories: raise IsADirectoryError(errno.EISDIR, "Is a directory", path) elif path in self._files: self._directories[parent].remove(name) del self._files[path] - elif path in self._symlinks: - self._directories[parent].remove(name) - del self._symlinks[path] elif not missing_ok: raise FileNotFoundError(errno.ENOENT, "File not found", path) def rmdir(self): - path_obj = self.parent.resolve(strict=True) / self.name - path = str(path_obj) - if path in self._files or path in self._symlinks: + path = str(self) + if path in self._files: raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) elif path not in self._directories: raise FileNotFoundError(errno.ENOENT, "File not found", path) elif self._directories[path]: raise OSError(errno.ENOTEMPTY, "Directory not empty", path) else: - name = path_obj.name - parent = str(path_obj.parent) + name = self.name + parent = str(self.parent) self._directories[parent].remove(name) del self._directories[path] @@ -1569,9 +1548,6 @@ class DummyPathTest(DummyPurePathTest): def setUp(self): super().setUp() - name = self.id().split('.')[-1] - if name in _tests_needing_symlinks and not self.can_symlink: - self.skipTest('requires symlinks') parser = self.cls.parser p = self.cls(self.base) p.mkdir(parents=True) @@ -1604,7 +1580,6 @@ def tearDown(self): cls = self.cls cls._files.clear() cls._directories.clear() - cls._symlinks.clear() def tempdir(self): path = self.cls(self.base).with_name('tmp-dirD') @@ -1730,101 +1705,6 @@ def test_copy_file(self): self.assertTrue(target.exists()) self.assertEqual(source.read_text(), target.read_text()) - @needs_symlinks - def test_copy_symlink_follow_symlinks_true(self): - base = self.cls(self.base) - source = base / 'linkA' - target = base / 'copyA' - result = source.copy(target) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertFalse(target.is_symlink()) - self.assertEqual(source.read_text(), target.read_text()) - - @needs_symlinks - def test_copy_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkA' - target = base / 'copyA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - - @needs_symlinks - def test_copy_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkA' - self.assertRaises(OSError, source.copy, source) - - @needs_symlinks - def test_copy_symlink_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'fileA') - target.symlink_to(base / 'dirC') - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_symlink_to_existing_directory_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'fileA') - target.symlink_to(base / 'dirC') - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkB' - target = base / 'copyA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - - @needs_symlinks - def test_copy_directory_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - self.assertRaises(OSError, source.copy, source) - self.assertRaises(OSError, source.copy, source, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_into_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - target = base / 'linkB' / 'copyB' - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - self.assertFalse(target.exists()) - - @needs_symlinks - def test_copy_directory_symlink_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'dirC') - target.symlink_to(base / 'fileA') - self.assertRaises(FileExistsError, source.copy, target) - self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_to_existing_directory_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'dirC' / 'dirD') - target.symlink_to(base / 'dirC') - self.assertRaises(FileExistsError, source.copy, target) - self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) - def test_copy_file_to_existing_file(self): base = self.cls(self.base) source = base / 'fileA' @@ -1840,34 +1720,6 @@ def test_copy_file_to_existing_directory(self): target = base / 'dirA' self.assertRaises(OSError, source.copy, target) - @needs_symlinks - def test_copy_file_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'dirB' / 'fileB' - target = base / 'linkA' - real_target = base / 'fileA' - result = source.copy(target) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertTrue(real_target.exists()) - self.assertFalse(real_target.is_symlink()) - self.assertEqual(source.read_text(), real_target.read_text()) - - @needs_symlinks - def test_copy_file_to_existing_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'dirB' / 'fileB' - target = base / 'linkA' - real_target = base / 'fileA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertTrue(real_target.exists()) - self.assertFalse(real_target.is_symlink()) - self.assertEqual(source.read_text(), real_target.read_text()) - def test_copy_file_empty(self): base = self.cls(self.base) source = base / 'empty' @@ -1985,23 +1837,6 @@ def test_copy_dir_into_itself(self): self.assertRaises(OSError, source.copy, target, follow_symlinks=False) self.assertFalse(target.exists()) - @needs_symlinks - def test_copy_dangling_symlink(self): - base = self.cls(self.base) - source = base / 'source' - target = base / 'target' - - source.mkdir() - source.joinpath('link').symlink_to('nonexistent') - - self.assertRaises(FileNotFoundError, source.copy, target) - - target2 = base / 'target2' - result = source.copy(target2, follow_symlinks=False) - self.assertEqual(result, target2) - self.assertTrue(target2.joinpath('link').is_symlink()) - self.assertEqual(target2.joinpath('link').readlink(), self.cls('nonexistent')) - def test_copy_into(self): base = self.cls(self.base) source = base / 'fileA' @@ -2087,54 +1922,6 @@ def test_move_dir_into_itself(self): self.assertTrue(source.exists()) self.assertFalse(target.exists()) - @needs_symlinks - def test_move_file_symlink(self): - base = self.cls(self.base) - source = base / 'linkA' - source_readlink = source.readlink() - target = base / 'linkA_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - - @needs_symlinks - def test_move_file_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkA' - self.assertRaises(OSError, source.move, source) - - @needs_symlinks - def test_move_dir_symlink(self): - base = self.cls(self.base) - source = base / 'linkB' - source_readlink = source.readlink() - target = base / 'linkB_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - - @needs_symlinks - def test_move_dir_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - self.assertRaises(OSError, source.move, source) - - @needs_symlinks - def test_move_dangling_symlink(self): - base = self.cls(self.base) - source = base / 'brokenLink' - source_readlink = source.readlink() - target = base / 'brokenLink_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - def test_move_into(self): base = self.cls(self.base) source = base / 'fileA' @@ -2161,15 +1948,6 @@ def test_iterdir(self): expected += ['linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'] self.assertEqual(paths, { P(self.base, q) for q in expected }) - @needs_symlinks - def test_iterdir_symlink(self): - # __iter__ on a symlink to a directory. - P = self.cls - p = P(self.base, 'linkB') - paths = set(p.iterdir()) - expected = { P(self.base, 'linkB', q) for q in ['fileB', 'linkD'] } - self.assertEqual(paths, expected) - def test_iterdir_nodir(self): # __iter__ on something that is not a directory. p = self.cls(self.base, 'fileA') @@ -2196,7 +1974,6 @@ def test_scandir(self): if entry.name != 'brokenLinkLoop': self.assertEqual(entry.is_dir(), child.is_dir()) - def test_glob_common(self): def _check(glob, expected): self.assertEqual(set(glob), { P(self.base, q) for q in expected }) @@ -2250,8 +2027,6 @@ def test_glob_empty_pattern(self): P = self.cls p = P(self.base) self.assertEqual(list(p.glob("")), [p]) - self.assertEqual(list(p.glob(".")), [p / "."]) - self.assertEqual(list(p.glob("./")), [p / "./"]) def test_glob_case_sensitive(self): P = self.cls @@ -2265,44 +2040,6 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", True, []) _check(path, "dirb/file*", False, ["dirB/fileB"]) - @needs_symlinks - @unittest.skipIf(is_emscripten, "Hangs") - def test_glob_recurse_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.glob(glob, recurse_symlinks=True) - if path.parts.count("linkD") <= 1} # exclude symlink loop. - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", []) - _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check(p, "*A", ["dirA", "fileA", "linkA"]) - _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) - _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) - _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) - _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) - _check(p, "dir*/**", [ - "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", - "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", - "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", - "dirE/"]) - _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirC/dirD/", "dirE/"]) - _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", - "dirB/linkD/..", "dirA/linkC/linkD/..", - "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check(p, "dir*/*/**", [ - "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", - "dirB/linkD/", "dirB/linkD/fileB", - "dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) - _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", - "dirB/linkD/..", "dirC/dirD/.."]) - _check(p, "dir*/**/fileC", ["dirC/fileC"]) - _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) - _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "*/dirD/**/", ["dirC/dirD/"]) - def test_rglob_recurse_symlinks_false(self): def _check(path, glob, expected): actual = set(path.rglob(glob, recurse_symlinks=False)) @@ -2361,252 +2098,6 @@ def test_rglob_windows(self): self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) - @needs_symlinks - @unittest.skipIf(is_emscripten, "Hangs") - def test_rglob_recurse_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.rglob(glob, recurse_symlinks=True) - if path.parts.count("linkD") <= 1} # exclude symlink loop. - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) - _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB", - "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) - _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) - _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) - - p = P(self.base, "dirC") - _check(p, "*", ["dirC/fileC", "dirC/novel.txt", - "dirC/dirD", "dirC/dirD/fileD"]) - _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p, "*/*", ["dirC/dirD/fileD"]) - _check(p, "*/", ["dirC/dirD/"]) - _check(p, "", ["dirC/", "dirC/dirD/"]) - # gh-91616, a re module regression - _check(p, "*.txt", ["dirC/novel.txt"]) - _check(p, "*.*", ["dirC/novel.txt"]) - - @needs_symlinks - def test_rglob_symlink_loop(self): - # Don't get fooled by symlink loops (Issue #26012). - P = self.cls - p = P(self.base) - given = set(p.rglob('*', recurse_symlinks=False)) - expect = {'brokenLink', - 'dirA', 'dirA/linkC', - 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', - 'dirC/fileC', 'dirC/novel.txt', - 'dirE', - 'fileA', - 'linkA', - 'linkB', - 'brokenLinkLoop', - } - self.assertEqual(given, {p / x for x in expect}) - - # See https://github.com/WebAssembly/wasi-filesystem/issues/26 - @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") - def test_glob_dotdot(self): - # ".." is not special in globs. - P = self.cls - p = P(self.base) - self.assertEqual(set(p.glob("..")), { P(self.base, "..") }) - self.assertEqual(set(p.glob("../..")), { P(self.base, "..", "..") }) - self.assertEqual(set(p.glob("dirA/..")), { P(self.base, "dirA", "..") }) - self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) - self.assertEqual(set(p.glob("dirA/../file*/..")), set()) - self.assertEqual(set(p.glob("../xyzzy")), set()) - if self.cls.parser is posixpath: - self.assertEqual(set(p.glob("xyzzy/..")), set()) - else: - # ".." segments are normalized first on Windows, so this path is stat()able. - self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) - self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) - - @needs_symlinks - def test_glob_permissions(self): - # See bpo-38894 - P = self.cls - base = P(self.base) / 'permissions' - base.mkdir() - - for i in range(100): - link = base / f"link{i}" - if i % 2: - link.symlink_to(P(self.base, "dirE", "nonexistent")) - else: - link.symlink_to(P(self.base, "dirC"), target_is_directory=True) - - self.assertEqual(len(set(base.glob("*"))), 100) - self.assertEqual(len(set(base.glob("*/"))), 50) - self.assertEqual(len(set(base.glob("*/fileC"))), 50) - self.assertEqual(len(set(base.glob("*/file*"))), 50) - - @needs_symlinks - def test_glob_long_symlink(self): - # See gh-87695 - base = self.cls(self.base) / 'long_symlink' - base.mkdir() - bad_link = base / 'bad_link' - bad_link.symlink_to("bad" * 200) - self.assertEqual(sorted(base.glob('**/*')), [bad_link]) - - @needs_posix - def test_absolute_posix(self): - P = self.cls - # The default implementation uses '/' as the current directory - self.assertEqual(str(P('').absolute()), '/') - self.assertEqual(str(P('a').absolute()), '/a') - self.assertEqual(str(P('a/b').absolute()), '/a/b') - - self.assertEqual(str(P('/').absolute()), '/') - self.assertEqual(str(P('/a').absolute()), '/a') - self.assertEqual(str(P('/a/b').absolute()), '/a/b') - - # '//'-prefixed absolute path (supported by POSIX). - self.assertEqual(str(P('//').absolute()), '//') - self.assertEqual(str(P('//a').absolute()), '//a') - self.assertEqual(str(P('//a/b').absolute()), '//a/b') - - @needs_symlinks - def test_readlink(self): - P = self.cls(self.base) - self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) - self.assertEqual((P / 'brokenLink').readlink(), - self.cls('non-existing')) - self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) - self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) - with self.assertRaises(OSError): - (P / 'fileA').readlink() - - @unittest.skipIf(hasattr(os, "readlink"), "os.readlink() is present") - def test_readlink_unsupported(self): - P = self.cls(self.base) - p = P / 'fileA' - with self.assertRaises(UnsupportedOperation): - q.readlink(p) - - def _check_resolve(self, p, expected, strict=True): - q = p.resolve(strict) - self.assertEqual(q, expected) - - # This can be used to check both relative and absolute resolutions. - _check_resolve_relative = _check_resolve_absolute = _check_resolve - - @needs_symlinks - def test_resolve_common(self): - P = self.cls - p = P(self.base, 'foo') - with self.assertRaises(OSError) as cm: - p.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ENOENT) - # Non-strict - parser = self.parser - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(self.base, 'foo')) - p = P(self.base, 'foo', 'in', 'spam') - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(self.base, 'foo', 'in', 'spam')) - p = P(self.base, '..', 'foo', 'in', 'spam') - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(parser.dirname(self.base), 'foo', 'in', 'spam')) - # These are all relative symlinks. - p = P(self.base, 'dirB', 'fileB') - self._check_resolve_relative(p, p) - p = P(self.base, 'linkA') - self._check_resolve_relative(p, P(self.base, 'fileA')) - p = P(self.base, 'dirA', 'linkC', 'fileB') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) - p = P(self.base, 'dirB', 'linkD', 'fileB') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) - # Non-strict - p = P(self.base, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB', 'foo', 'in', - 'spam'), False) - p = P(self.base, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') - if self.cls.parser is not posixpath: - # In Windows, if linkY points to dirB, 'dirA\linkY\..' - # resolves to 'dirA' without resolving linkY first. - self._check_resolve_relative(p, P(self.base, 'dirA', 'foo', 'in', - 'spam'), False) - else: - # In Posix, if linkY points to dirB, 'dirA/linkY/..' - # resolves to 'dirB/..' first before resolving to parent of dirB. - self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) - # Now create absolute symlinks. - d = self.tempdir() - P(self.base, 'dirA', 'linkX').symlink_to(d) - P(self.base, str(d), 'linkY').symlink_to(self.parser.join(self.base, 'dirB')) - p = P(self.base, 'dirA', 'linkX', 'linkY', 'fileB') - self._check_resolve_absolute(p, P(self.base, 'dirB', 'fileB')) - # Non-strict - p = P(self.base, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') - self._check_resolve_relative(p, P(self.base, 'dirB', 'foo', 'in', 'spam'), - False) - p = P(self.base, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') - if self.cls.parser is not posixpath: - # In Windows, if linkY points to dirB, 'dirA\linkY\..' - # resolves to 'dirA' without resolving linkY first. - self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) - else: - # In Posix, if linkY points to dirB, 'dirA/linkY/..' - # resolves to 'dirB/..' first before resolving to parent of dirB. - self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) - - @needs_symlinks - def test_resolve_dot(self): - # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ - parser = self.parser - p = self.cls(self.base) - p.joinpath('0').symlink_to('.', target_is_directory=True) - p.joinpath('1').symlink_to(parser.join('0', '0'), target_is_directory=True) - p.joinpath('2').symlink_to(parser.join('1', '1'), target_is_directory=True) - q = p / '2' - self.assertEqual(q.resolve(strict=True), p) - r = q / '3' / '4' - self.assertRaises(FileNotFoundError, r.resolve, strict=True) - # Non-strict - self.assertEqual(r.resolve(strict=False), p / '3' / '4') - - def _check_symlink_loop(self, *args): - path = self.cls(*args) - with self.assertRaises(OSError) as cm: - path.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ELOOP) - - @needs_posix - @needs_symlinks - def test_resolve_loop(self): - # Loops with relative symlinks. - self.cls(self.base, 'linkX').symlink_to('linkX/inside') - self._check_symlink_loop(self.base, 'linkX') - self.cls(self.base, 'linkY').symlink_to('linkY') - self._check_symlink_loop(self.base, 'linkY') - self.cls(self.base, 'linkZ').symlink_to('linkZ/../linkZ') - self._check_symlink_loop(self.base, 'linkZ') - # Non-strict - p = self.cls(self.base, 'linkZ', 'foo') - self.assertEqual(p.resolve(strict=False), p) - # Loops with absolute symlinks. - self.cls(self.base, 'linkU').symlink_to(self.parser.join(self.base, 'linkU/inside')) - self._check_symlink_loop(self.base, 'linkU') - self.cls(self.base, 'linkV').symlink_to(self.parser.join(self.base, 'linkV')) - self._check_symlink_loop(self.base, 'linkV') - self.cls(self.base, 'linkW').symlink_to(self.parser.join(self.base, 'linkW/../linkW')) - self._check_symlink_loop(self.base, 'linkW') - # Non-strict - q = self.cls(self.base, 'linkW', 'foo') - self.assertEqual(q.resolve(strict=False), q) - def test_stat(self): statA = self.cls(self.base).joinpath('fileA').stat() statB = self.cls(self.base).joinpath('dirB', 'fileB').stat() @@ -2627,12 +2118,6 @@ def test_stat(self): self.assertEqual(statA.st_dev, statC.st_dev) # other attributes not used by pathlib. - @needs_symlinks - def test_stat_no_follow_symlinks(self): - p = self.cls(self.base) / 'linkA' - st = p.stat() - self.assertNotEqual(st, p.stat(follow_symlinks=False)) - def test_stat_no_follow_symlinks_nosymlink(self): p = self.cls(self.base) / 'fileA' st = p.stat() @@ -2760,41 +2245,6 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\udfff').is_char_device(), False) self.assertIs((P / 'fileA\x00').is_char_device(), False) - def _check_complex_symlinks(self, link0_target): - # Test solving a non-looping chain of symlinks (issue #19887). - parser = self.parser - P = self.cls(self.base) - P.joinpath('link1').symlink_to(parser.join('link0', 'link0'), target_is_directory=True) - P.joinpath('link2').symlink_to(parser.join('link1', 'link1'), target_is_directory=True) - P.joinpath('link3').symlink_to(parser.join('link2', 'link2'), target_is_directory=True) - P.joinpath('link0').symlink_to(link0_target, target_is_directory=True) - - # Resolve absolute paths. - p = (P / 'link0').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link1').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link2').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link3').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - - @needs_symlinks - def test_complex_symlinks_absolute(self): - self._check_complex_symlinks(self.base) - - @needs_symlinks - def test_complex_symlinks_relative(self): - self._check_complex_symlinks('.') - - @needs_symlinks - def test_complex_symlinks_relative_dot_dot(self): - self._check_complex_symlinks(self.parser.join('dirA', '..')) - def test_unlink(self): p = self.cls(self.base) / 'fileA' p.unlink() @@ -2838,41 +2288,6 @@ def test_delete_dir(self): self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'fileC').stat) self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'novel.txt').stat) - @needs_symlinks - def test_delete_symlink(self): - tmp = self.cls(self.base, 'delete') - tmp.mkdir() - dir_ = tmp / 'dir' - dir_.mkdir() - link = tmp / 'link' - link.symlink_to(dir_) - link._delete() - self.assertTrue(dir_.exists()) - self.assertFalse(link.exists(follow_symlinks=False)) - - @needs_symlinks - def test_delete_inner_symlink(self): - tmp = self.cls(self.base, 'delete') - tmp.mkdir() - dir1 = tmp / 'dir1' - dir2 = dir1 / 'dir2' - dir3 = tmp / 'dir3' - for d in dir1, dir2, dir3: - d.mkdir() - file1 = tmp / 'file1' - file1.write_text('foo') - link1 = dir1 / 'link1' - link1.symlink_to(dir2) - link2 = dir1 / 'link2' - link2.symlink_to(dir3) - link3 = dir1 / 'link3' - link3.symlink_to(file1) - # make sure symlinks are removed but not followed - dir1._delete() - self.assertFalse(dir1.exists()) - self.assertTrue(dir3.exists()) - self.assertTrue(file1.exists()) - def test_delete_missing(self): tmp = self.cls(self.base, 'delete') tmp.mkdir() @@ -2887,9 +2302,6 @@ class DummyPathWalkTest(unittest.TestCase): can_symlink = False def setUp(self): - name = self.id().split('.')[-1] - if name in _tests_needing_symlinks and not self.can_symlink: - self.skipTest('requires symlinks') # Build: # TESTFN/ # TEST1/ a file kid and two directory kids @@ -3002,70 +2414,6 @@ def test_walk_bottom_up(self): raise AssertionError(f"Unexpected path: {path}") self.assertTrue(seen_testfn) - @needs_symlinks - def test_walk_follow_symlinks(self): - walk_it = self.walk_path.walk(follow_symlinks=True) - for root, dirs, files in walk_it: - if root == self.link_path: - self.assertEqual(dirs, []) - self.assertEqual(files, ["tmp4"]) - break - else: - self.fail("Didn't follow symlink with follow_symlinks=True") - - @needs_symlinks - def test_walk_symlink_location(self): - # Tests whether symlinks end up in filenames or dirnames depending - # on the `follow_symlinks` argument. - walk_it = self.walk_path.walk(follow_symlinks=False) - for root, dirs, files in walk_it: - if root == self.sub2_path: - self.assertIn("link", files) - break - else: - self.fail("symlink not found") - - walk_it = self.walk_path.walk(follow_symlinks=True) - for root, dirs, files in walk_it: - if root == self.sub2_path: - self.assertIn("link", dirs) - break - else: - self.fail("symlink not found") - - -class DummyPathWithSymlinks(DummyPath): - __slots__ = () - - # Reduce symlink traversal limit to make tests run faster. - _max_symlinks = 20 - - def readlink(self): - path = str(self.parent.resolve() / self.name) - if path in self._symlinks: - return self.with_segments(self._symlinks[path][0]) - elif path in self._files or path in self._directories: - raise OSError(errno.EINVAL, "Not a symlink", path) - else: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - - def symlink_to(self, target, target_is_directory=False): - path = str(self.parent.resolve() / self.name) - parent = str(self.parent.resolve()) - if path in self._symlinks: - raise FileExistsError(errno.EEXIST, "File exists", path) - self._directories[parent].add(self.name) - self._symlinks[path] = str(target), target_is_directory - - -class DummyPathWithSymlinksTest(DummyPathTest): - cls = DummyPathWithSymlinks - can_symlink = True - - -class DummyPathWithSymlinksWalkTest(DummyPathWalkTest): - cls = DummyPathWithSymlinks - can_symlink = True if __name__ == "__main__": From 72dca6c4eda0d63ee35a0aa619ae931ab226bef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Srinivas=20Reddy=20Thatiparthy=20=28=E0=B0=A4=E0=B0=BE?= =?UTF-8?q?=E0=B0=9F=E0=B0=BF=E0=B0=AA=E0=B0=B0=E0=B1=8D=E0=B0=A4=E0=B0=BF?= =?UTF-8?q?=20=E0=B0=B6=E0=B1=8D=E0=B0=B0=E0=B1=80=E0=B0=A8=E0=B0=BF?= =?UTF-8?q?=E0=B0=B5=E0=B0=BE=E0=B0=B8=E0=B1=8D=20=20=E0=B0=B0=E0=B1=86?= =?UTF-8?q?=E0=B0=A1=E0=B1=8D=E0=B0=A1=E0=B0=BF=29?= Date: Sat, 7 Dec 2024 15:42:45 +0530 Subject: [PATCH 13/55] gh-119786: fix typo in `InternalDocs/garbage_collector.md` (#127687) --- InternalDocs/garbage_collector.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 4761f78f3593e3..394e4ef075f55e 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -518,7 +518,7 @@ Then the above algorithm is repeated, starting from step 2. Determining how much work to do ------------------------------- -We need to do a certain amount of work to enusre that garbage is collected, +We need to do a certain amount of work to ensure that garbage is collected, but doing too much work slows down execution. To work out how much work we need to do, consider a heap with `L` live objects From 27d0d2141319d82709eb09ba20065df3e1714fab Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sat, 7 Dec 2024 16:13:49 +0000 Subject: [PATCH 14/55] Give `poplib.POP3.rpop` a proper docstring (#127370) Previously `poplib.POP3.rpop` had a "Not sure what this does" docstring, now it has been fixed. --- Lib/poplib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/poplib.py b/Lib/poplib.py index 1a1629d175b6d9..beb93a0d57cf93 100644 --- a/Lib/poplib.py +++ b/Lib/poplib.py @@ -309,7 +309,7 @@ def close(self): # optional commands: def rpop(self, user): - """Not sure what this does.""" + """Send RPOP command to access the mailbox with an alternate user.""" return self._shortcmd('RPOP %s' % user) From 79b7cab50a3292a1c01466cf0e69fb7b4e56cfb1 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 7 Dec 2024 17:58:42 +0000 Subject: [PATCH 15/55] GH-127090: Fix `urllib.response.addinfourl.url` value for opened `file:` URIs (#127091) The canonical `file:` URL (as generated by `pathname2url()`) is now used as the `url` attribute of the returned `addinfourl` object. The `addinfourl.url` attribute reflects the resolved URL for both `file:` or `http[s]:` URLs now. --- Lib/test/test_urllib.py | 11 ++++--- Lib/test/test_urllib2.py | 31 ++++++++----------- Lib/test/test_urllib2net.py | 3 +- Lib/urllib/request.py | 5 +-- ...-11-21-06-03-46.gh-issue-127090.yUYwdh.rst | 3 ++ 5 files changed, 25 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 00e46990c406ac..042d3b35b77022 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -156,7 +156,7 @@ def test_headers(self): self.assertIsInstance(self.returned_obj.headers, email.message.Message) def test_url(self): - self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname) + self.assertEqual(self.returned_obj.url, "file:" + self.quoted_pathname) def test_status(self): self.assertIsNone(self.returned_obj.status) @@ -165,7 +165,7 @@ def test_info(self): self.assertIsInstance(self.returned_obj.info(), email.message.Message) def test_geturl(self): - self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname) + self.assertEqual(self.returned_obj.geturl(), "file:" + self.quoted_pathname) def test_getcode(self): self.assertIsNone(self.returned_obj.getcode()) @@ -471,11 +471,14 @@ def test_missing_localfile(self): def test_file_notexists(self): fd, tmp_file = tempfile.mkstemp() - tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + tmp_file_canon_url = 'file:' + urllib.request.pathname2url(tmp_file) + parsed = urllib.parse.urlsplit(tmp_file_canon_url) + tmp_fileurl = parsed._replace(netloc='localhost').geturl() try: self.assertTrue(os.path.exists(tmp_file)) with urllib.request.urlopen(tmp_fileurl) as fobj: self.assertTrue(fobj) + self.assertEqual(fobj.url, tmp_file_canon_url) finally: os.close(fd) os.unlink(tmp_file) @@ -609,7 +612,7 @@ def tearDown(self): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) - return "file://%s" % urllib.request.pathname2url(filePath) + return "file:" + urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 99ad11cf0552eb..4a9e653515be5b 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -23,7 +23,7 @@ _proxy_bypass_winreg_override, _proxy_bypass_macosx_sysconf, AbstractDigestAuthHandler) -from urllib.parse import urlparse +from urllib.parse import urlsplit import urllib.error import http.client @@ -717,14 +717,6 @@ def test_processors(self): self.assertIsInstance(args[1], MockResponse) -def sanepathname2url(path): - urlpath = urllib.request.pathname2url(path) - if os.name == "nt" and urlpath.startswith("///"): - urlpath = urlpath[2:] - # XXX don't ask me about the mac... - return urlpath - - class HandlerTests(unittest.TestCase): def test_ftp(self): @@ -818,19 +810,22 @@ def test_file(self): o = h.parent = MockOpener() TESTFN = os_helper.TESTFN - urlpath = sanepathname2url(os.path.abspath(TESTFN)) towrite = b"hello, world\n" + canonurl = 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)) + parsed = urlsplit(canonurl) + if parsed.netloc: + raise unittest.SkipTest("non-local working directory") urls = [ - "file://localhost%s" % urlpath, - "file://%s" % urlpath, - "file://%s%s" % (socket.gethostbyname('localhost'), urlpath), + canonurl, + parsed._replace(netloc='localhost').geturl(), + parsed._replace(netloc=socket.gethostbyname('localhost')).geturl(), ] try: localaddr = socket.gethostbyname(socket.gethostname()) except socket.gaierror: localaddr = '' if localaddr: - urls.append("file://%s%s" % (localaddr, urlpath)) + urls.append(parsed._replace(netloc=localaddr).geturl()) for url in urls: f = open(TESTFN, "wb") @@ -855,10 +850,10 @@ def test_file(self): self.assertEqual(headers["Content-type"], "text/plain") self.assertEqual(headers["Content-length"], "13") self.assertEqual(headers["Last-modified"], modified) - self.assertEqual(respurl, url) + self.assertEqual(respurl, canonurl) for url in [ - "file://localhost:80%s" % urlpath, + parsed._replace(netloc='localhost:80').geturl(), "file:///file_does_not_exist.txt", "file://not-a-local-host.com//dir/file.txt", "file://%s:80%s/%s" % (socket.gethostbyname('localhost'), @@ -1156,13 +1151,13 @@ def test_full_url_setter(self): r = Request('http://example.com') for url in urls: r.full_url = url - parsed = urlparse(url) + parsed = urlsplit(url) self.assertEqual(r.get_full_url(), url) # full_url setter uses splittag to split into components. # splittag sets the fragment as None while urlparse sets it to '' self.assertEqual(r.fragment or '', parsed.fragment) - self.assertEqual(urlparse(r.get_full_url()).query, parsed.query) + self.assertEqual(urlsplit(r.get_full_url()).query, parsed.query) def test_full_url_deleter(self): r = Request('http://www.example.com') diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index f0874d8d3ce463..b84290a7368c29 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -4,7 +4,6 @@ from test.support import os_helper from test.support import socket_helper from test.support import ResourceDenied -from test.test_urllib2 import sanepathname2url import os import socket @@ -151,7 +150,7 @@ def test_file(self): f.write('hi there\n') f.close() urls = [ - 'file:' + sanepathname2url(os.path.abspath(TESTFN)), + 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)), ('file:///nonsensename/etc/passwd', None, urllib.error.URLError), ] diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 1fcaa89188188d..7ef85431b718ad 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1488,10 +1488,7 @@ def open_local_file(self, req): host, port = _splitport(host) if not host or \ (not port and _safe_gethostbyname(host) in self.get_names()): - if host: - origurl = 'file://' + host + filename - else: - origurl = 'file://' + filename + origurl = 'file:' + pathname2url(localfile) return addinfourl(open(localfile, 'rb'), headers, origurl) except OSError as exp: raise URLError(exp, exp.filename) diff --git a/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst new file mode 100644 index 00000000000000..8efe563f443774 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst @@ -0,0 +1,3 @@ +Fix value of :attr:`urllib.response.addinfourl.url` for ``file:`` URLs that +express relative paths and absolute Windows paths. The canonical URL generated +by :func:`urllib.request.pathname2url` is now used. From 70154855cf698560dd9a5e484a649839cd68dc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns=20=F0=9F=87=B5=F0=9F=87=B8?= Date: Sun, 8 Dec 2024 05:57:22 +0000 Subject: [PATCH 16/55] GH-126789: fix some sysconfig data on late site initializations (#127729) --- Lib/sysconfig/__init__.py | 18 +++++++++---- Lib/test/test_sysconfig.py | 25 +++++++++++++++++++ ...-12-07-23-06-44.gh-issue-126789.4dqfV1.rst | 5 ++++ 3 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py index ad86609016e478..ed7b6a335d01d4 100644 --- a/Lib/sysconfig/__init__.py +++ b/Lib/sysconfig/__init__.py @@ -173,9 +173,7 @@ def joinuser(*args): _PY_VERSION = sys.version.split()[0] _PY_VERSION_SHORT = f'{sys.version_info[0]}.{sys.version_info[1]}' _PY_VERSION_SHORT_NO_DOT = f'{sys.version_info[0]}{sys.version_info[1]}' -_PREFIX = os.path.normpath(sys.prefix) _BASE_PREFIX = os.path.normpath(sys.base_prefix) -_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) _BASE_EXEC_PREFIX = os.path.normpath(sys.base_exec_prefix) # Mutex guarding initialization of _CONFIG_VARS. _CONFIG_VARS_LOCK = threading.RLock() @@ -473,8 +471,8 @@ def _init_config_vars(): global _CONFIG_VARS _CONFIG_VARS = {} - prefix = _PREFIX - exec_prefix = _EXEC_PREFIX + prefix = os.path.normpath(sys.prefix) + exec_prefix = os.path.normpath(sys.exec_prefix) base_prefix = _BASE_PREFIX base_exec_prefix = _BASE_EXEC_PREFIX @@ -564,9 +562,19 @@ def get_config_vars(*args): With arguments, return a list of values that result from looking up each argument in the configuration variable dictionary. """ + global _CONFIG_VARS_INITIALIZED # Avoid claiming the lock once initialization is complete. - if not _CONFIG_VARS_INITIALIZED: + if _CONFIG_VARS_INITIALIZED: + # GH-126789: If sys.prefix or sys.exec_prefix were updated, invalidate the cache. + prefix = os.path.normpath(sys.prefix) + exec_prefix = os.path.normpath(sys.exec_prefix) + if _CONFIG_VARS['prefix'] != prefix or _CONFIG_VARS['exec_prefix'] != exec_prefix: + with _CONFIG_VARS_LOCK: + _CONFIG_VARS_INITIALIZED = False + _init_config_vars() + else: + # Initialize the config_vars cache. with _CONFIG_VARS_LOCK: # Test again with the lock held to avoid races. Note that # we test _CONFIG_VARS here, not _CONFIG_VARS_INITIALIZED, diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index 0df1a67ea2b720..ce504dc21af85f 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -53,6 +53,8 @@ def setUp(self): os.uname = self._get_uname # saving the environment self.name = os.name + self.prefix = sys.prefix + self.exec_prefix = sys.exec_prefix self.platform = sys.platform self.version = sys.version self._framework = sys._framework @@ -77,6 +79,8 @@ def tearDown(self): else: del os.uname os.name = self.name + sys.prefix = self.prefix + sys.exec_prefix = self.exec_prefix sys.platform = self.platform sys.version = self.version sys._framework = self._framework @@ -653,6 +657,27 @@ def test_sysconfigdata_json(self): self.assertEqual(system_config_vars, json_config_vars) + def test_sysconfig_config_vars_no_prefix_cache(self): + sys.prefix = 'prefix-AAA' + sys.exec_prefix = 'exec-prefix-AAA' + + config_vars = sysconfig.get_config_vars() + + self.assertEqual(config_vars['prefix'], sys.prefix) + self.assertEqual(config_vars['base'], sys.prefix) + self.assertEqual(config_vars['exec_prefix'], sys.exec_prefix) + self.assertEqual(config_vars['platbase'], sys.exec_prefix) + + sys.prefix = 'prefix-BBB' + sys.exec_prefix = 'exec-prefix-BBB' + + config_vars = sysconfig.get_config_vars() + + self.assertEqual(config_vars['prefix'], sys.prefix) + self.assertEqual(config_vars['base'], sys.prefix) + self.assertEqual(config_vars['exec_prefix'], sys.exec_prefix) + self.assertEqual(config_vars['platbase'], sys.exec_prefix) + class MakefileTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst b/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst new file mode 100644 index 00000000000000..417e9ac986f27a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst @@ -0,0 +1,5 @@ +Fixed :func:`sysconfig.get_config_vars`, :func:`sysconfig.get_paths`, and +siblings, returning outdated cached data if the value of :data:`sys.prefix` +or :data:`sys.exec_prefix` changes. Overwriting :data:`sys.prefix` or +:data:`sys.exec_prefix` still is discouraged, as that might break other +parts of the code. From 1503fc8f88d4903e61f76a78a30bcd581b0ee0cd Mon Sep 17 00:00:00 2001 From: Apostol Fet <90645107+ApostolFet@users.noreply.github.com> Date: Sun, 8 Dec 2024 13:05:15 +0300 Subject: [PATCH 17/55] gh-127610: Added validation for more than one var-positional and var-keyword parameters in inspect.Signature (GH-127657) --- Lib/inspect.py | 8 ++++++++ Lib/test/test_inspect/test_inspect.py | 11 +++++++++++ Misc/ACKS | 1 + .../2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst | 3 +++ 4 files changed, 23 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst diff --git a/Lib/inspect.py b/Lib/inspect.py index e3f74e9f047eaf..b7d8271f8a471f 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -2943,11 +2943,19 @@ def __init__(self, parameters=None, *, return_annotation=_empty, params = OrderedDict() top_kind = _POSITIONAL_ONLY seen_default = False + seen_var_parameters = set() for param in parameters: kind = param.kind name = param.name + if kind in (_VAR_POSITIONAL, _VAR_KEYWORD): + if kind in seen_var_parameters: + msg = f'more than one {kind.description} parameter' + raise ValueError(msg) + + seen_var_parameters.add(kind) + if kind < top_kind: msg = ( 'wrong parameter order: {} parameter before {} ' diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index a92627a4d60f68..1ecf18bf49fa7e 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -2992,6 +2992,17 @@ def test2(pod=42, /): with self.assertRaisesRegex(ValueError, 'follows default argument'): S((pkd, pk)) + second_args = args.replace(name="second_args") + with self.assertRaisesRegex(ValueError, 'more than one variadic positional parameter'): + S((args, second_args)) + + with self.assertRaisesRegex(ValueError, 'more than one variadic positional parameter'): + S((args, ko, second_args)) + + second_kwargs = kwargs.replace(name="second_kwargs") + with self.assertRaisesRegex(ValueError, 'more than one variadic keyword parameter'): + S((kwargs, second_kwargs)) + def test_signature_object_pickle(self): def foo(a, b, *, c:1={}, **kw) -> {42:'ham'}: pass foo_partial = functools.partial(foo, a=1) diff --git a/Misc/ACKS b/Misc/ACKS index 913f7c8ecf5f1e..086930666822ad 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -24,6 +24,7 @@ Eitan Adler Anton Afanasyev Ali Afshar Nitika Agarwal +Maxim Ageev Anjani Agrawal Pablo S. Blum de Aguiar Jim Ahlstrom diff --git a/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst b/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst new file mode 100644 index 00000000000000..58769029d79977 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst @@ -0,0 +1,3 @@ +Added validation for more than one var-positional or +var-keyword parameters in :class:`inspect.Signature`. +Patch by Maxim Ageev. From 8fa5ecec01337215bc7baa62c9c16488ecd854fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Dec 2024 14:47:22 +0100 Subject: [PATCH 18/55] gh-123378: fix post-merge typos in comments and NEWS (#127739) --- .../C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 2 +- Objects/exceptions.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst index 2cfb8b8a1e245a..7254a04f61843d 100644 --- a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst +++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -1,5 +1,5 @@ Ensure that the value of :attr:`UnicodeEncodeError.start ` -retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in +retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lies in ``[0, max(0, objlen - 1)]`` where *objlen* is the length of :attr:`UnicodeEncodeError.object `. Similar arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 124b591ee3a13f..287cbc25305964 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2712,7 +2712,7 @@ set_unicodefromstring(PyObject **attr, const char *value) * Adjust the (inclusive) 'start' value of a UnicodeError object. * * The 'start' can be negative or not, but when adjusting the value, - * we clip it in [0, max(0, objlen - 1)] but do not intepret it as + * we clip it in [0, max(0, objlen - 1)] and do not interpret it as * a relative offset. */ static inline Py_ssize_t @@ -2732,8 +2732,8 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) * Adjust the (exclusive) 'end' value of a UnicodeError object. * * The 'end' can be negative or not, but when adjusting the value, - * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but - * do not intepret it as a relative offset. + * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and + * do not interpret it as a relative offset. */ static inline Py_ssize_t unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) From 3b78409878c39d5afa344f7284b57104f7e765c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Dec 2024 18:31:10 +0100 Subject: [PATCH 19/55] gh-87138: convert SHA-3 object type to heap type (GH-127670) --- Modules/sha3module.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/Modules/sha3module.c b/Modules/sha3module.c index ca839dc55e0519..b13e6a9de10114 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -71,13 +71,13 @@ typedef struct { static SHA3object * newSHA3object(PyTypeObject *type) { - SHA3object *newobj; - newobj = (SHA3object *)PyObject_New(SHA3object, type); + SHA3object *newobj = PyObject_GC_New(SHA3object, type); if (newobj == NULL) { return NULL; } HASHLIB_INIT_MUTEX(newobj); + PyObject_GC_Track(newobj); return newobj; } @@ -166,15 +166,32 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity) /* Internal methods for a hash object */ +static int +SHA3_clear(SHA3object *self) +{ + if (self->hash_state != NULL) { + Hacl_Hash_SHA3_free(self->hash_state); + self->hash_state = NULL; + } + return 0; +} + static void SHA3_dealloc(SHA3object *self) { - Hacl_Hash_SHA3_free(self->hash_state); PyTypeObject *tp = Py_TYPE(self); - PyObject_Free(self); + PyObject_GC_UnTrack(self); + (void)SHA3_clear(self); + tp->tp_free(self); Py_DECREF(tp); } +static int +SHA3_traverse(PyObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + return 0; +} /* External methods for a hash object */ @@ -335,6 +352,7 @@ static PyObject * SHA3_get_capacity_bits(SHA3object *self, void *closure) { uint32_t rate = Hacl_Hash_SHA3_block_len(self->hash_state) * 8; + assert(rate <= 1600); int capacity = 1600 - rate; return PyLong_FromLong(capacity); } @@ -366,12 +384,14 @@ static PyGetSetDef SHA3_getseters[] = { #define SHA3_TYPE_SLOTS(type_slots_obj, type_doc, type_methods, type_getseters) \ static PyType_Slot type_slots_obj[] = { \ + {Py_tp_clear, SHA3_clear}, \ {Py_tp_dealloc, SHA3_dealloc}, \ + {Py_tp_traverse, SHA3_traverse}, \ {Py_tp_doc, (char*)type_doc}, \ {Py_tp_methods, type_methods}, \ {Py_tp_getset, type_getseters}, \ {Py_tp_new, py_sha3_new}, \ - {0,0} \ + {0, NULL} \ } // Using _PyType_GetModuleState() on these types is safe since they @@ -380,7 +400,8 @@ static PyGetSetDef SHA3_getseters[] = { static PyType_Spec type_spec_obj = { \ .name = "_sha3." type_name, \ .basicsize = sizeof(SHA3object), \ - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE, \ + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE \ + | Py_TPFLAGS_HAVE_GC, \ .slots = type_slots \ } @@ -444,9 +465,7 @@ _SHAKE_digest(SHA3object *self, unsigned long digestlen, int hex) result = PyBytes_FromStringAndSize((const char *)digest, digestlen); } - if (digest != NULL) { - PyMem_Free(digest); - } + PyMem_Free(digest); return result; } @@ -563,7 +582,7 @@ _sha3_clear(PyObject *module) static void _sha3_free(void *module) { - _sha3_clear((PyObject *)module); + (void)_sha3_clear((PyObject *)module); } static int From 2367759212f609b8ddf3218003b3ccd8e72849ae Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sun, 8 Dec 2024 18:01:55 +0000 Subject: [PATCH 20/55] [doc] Fix typos in `interpreter_definition.md` (#127742) --- Tools/cases_generator/interpreter_definition.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index 203286834e3e3f..d50c420307852f 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -309,7 +309,7 @@ This might become (if it was an instruction): ### More examples -For explanations see "Generating the interpreter" below.) +For explanations see "Generating the interpreter" below. ```C op ( CHECK_HAS_INSTANCE_VALUES, (owner -- owner) ) { PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); @@ -371,7 +371,7 @@ For explanations see "Generating the interpreter" below.) A _family_ maps a specializable instruction to its specializations. -Example: These opcodes all share the same instruction format): +Example: These opcodes all share the same instruction format: ```C family(load_attr) = { LOAD_ATTR, LOAD_ATTR_INSTANCE_VALUE, LOAD_SLOT }; ``` @@ -393,7 +393,7 @@ which can be easily inserted. What is more complex is ensuring the correct stack and not generating excess pops and pushes. For example, in `CHECK_HAS_INSTANCE_VALUES`, `owner` occurs in the input, so it cannot be -redefined. Thus it doesn't need to written and can be read without adjusting the stack pointer. +redefined. Thus it doesn't need to be written and can be read without adjusting the stack pointer. The C code generated for `CHECK_HAS_INSTANCE_VALUES` would look something like: ```C From 7f8ec523021427a5c1ab3ce0cdd6e4bb909f1dc5 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 8 Dec 2024 18:45:09 +0000 Subject: [PATCH 21/55] GH-127381: pathlib ABCs: remove `PathBase.unlink()` and `rmdir()` (#127736) Virtual filesystems don't always make a distinction between deleting files and empty directories, and sometimes support deleting non-empty directories in a single operation. Here we remove `PathBase.unlink()` and `rmdir()`, leaving `_delete()` as the sole deletion method, now made abstract. I hope to drop the underscore prefix later on. --- Lib/pathlib/_abc.py | 43 +++-------------- Lib/pathlib/_local.py | 16 +++++-- Lib/test/test_pathlib/test_pathlib.py | 19 ++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 56 +++++------------------ 4 files changed, 48 insertions(+), 86 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 820970fcd5889b..309eab2ff855c3 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -840,6 +840,12 @@ def copy_into(self, target_dir, *, follow_symlinks=True, dirs_exist_ok=dirs_exist_ok, preserve_metadata=preserve_metadata) + def _delete(self): + """ + Delete this file or directory (including all sub-directories). + """ + raise UnsupportedOperation(self._unsupported_msg('_delete()')) + def move(self, target): """ Recursively move this file or directory tree to the given destination. @@ -874,43 +880,6 @@ def lchmod(self, mode): """ self.chmod(mode, follow_symlinks=False) - def unlink(self, missing_ok=False): - """ - Remove this file or link. - If the path is a directory, use rmdir() instead. - """ - raise UnsupportedOperation(self._unsupported_msg('unlink()')) - - def rmdir(self): - """ - Remove this directory. The directory must be empty. - """ - raise UnsupportedOperation(self._unsupported_msg('rmdir()')) - - def _delete(self): - """ - Delete this file or directory (including all sub-directories). - """ - if self.is_symlink() or self.is_junction(): - self.unlink() - elif self.is_dir(): - self._rmtree() - else: - self.unlink() - - def _rmtree(self): - def on_error(err): - raise err - results = self.walk( - on_error=on_error, - top_down=False, # So we rmdir() empty directories. - follow_symlinks=False) - for dirpath, _, filenames in results: - for filename in filenames: - filepath = dirpath / filename - filepath.unlink() - dirpath.rmdir() - def owner(self, *, follow_symlinks=True): """ Return the login name of the file owner. diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 250bc12956f5bc..f87069ce70a2de 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -846,10 +846,18 @@ def rmdir(self): """ os.rmdir(self) - def _rmtree(self): - # Lazy import to improve module import time - import shutil - shutil.rmtree(self) + def _delete(self): + """ + Delete this file or directory (including all sub-directories). + """ + if self.is_symlink() or self.is_junction(): + self.unlink() + elif self.is_dir(): + # Lazy import to improve module import time + import shutil + shutil.rmtree(self) + else: + self.unlink() def rename(self, target): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 8c9049f15d5bf9..ce0f4748c860b1 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1352,6 +1352,25 @@ def test_group_no_follow_symlinks(self): self.assertEqual(expected_gid, gid_2) self.assertEqual(expected_name, link.group(follow_symlinks=False)) + def test_unlink(self): + p = self.cls(self.base) / 'fileA' + p.unlink() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + + def test_unlink_missing_ok(self): + p = self.cls(self.base) / 'fileAAA' + self.assertFileNotFound(p.unlink) + p.unlink(missing_ok=True) + + def test_rmdir(self): + p = self.cls(self.base) / 'dirA' + for q in p.iterdir(): + q.unlink() + p.rmdir() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + @needs_symlinks def test_delete_symlink(self): tmp = self.cls(self.base, 'delete') diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index bf9ae6cc8a2433..675abf30a9f13c 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1370,8 +1370,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.touch) self.assertRaises(e, p.chmod, 0o755) self.assertRaises(e, p.lchmod, 0o755) - self.assertRaises(e, p.unlink) - self.assertRaises(e, p.rmdir) self.assertRaises(e, p.owner) self.assertRaises(e, p.group) self.assertRaises(e, p.as_uri) @@ -1493,31 +1491,18 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): self.parent.mkdir(parents=True, exist_ok=True) self.mkdir(mode, parents=False, exist_ok=exist_ok) - def unlink(self, missing_ok=False): - path = str(self) - name = self.name - parent = str(self.parent) - if path in self._directories: - raise IsADirectoryError(errno.EISDIR, "Is a directory", path) - elif path in self._files: - self._directories[parent].remove(name) - del self._files[path] - elif not missing_ok: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - - def rmdir(self): + def _delete(self): path = str(self) if path in self._files: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) - elif path not in self._directories: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - elif self._directories[path]: - raise OSError(errno.ENOTEMPTY, "Directory not empty", path) - else: - name = self.name - parent = str(self.parent) - self._directories[parent].remove(name) + del self._files[path] + elif path in self._directories: + for name in list(self._directories[path]): + self.joinpath(name)._delete() del self._directories[path] + else: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + parent = str(self.parent) + self._directories[parent].remove(self.name) class DummyPathTest(DummyPurePathTest): @@ -2245,30 +2230,11 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\udfff').is_char_device(), False) self.assertIs((P / 'fileA\x00').is_char_device(), False) - def test_unlink(self): - p = self.cls(self.base) / 'fileA' - p.unlink() - self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) - - def test_unlink_missing_ok(self): - p = self.cls(self.base) / 'fileAAA' - self.assertFileNotFound(p.unlink) - p.unlink(missing_ok=True) - - def test_rmdir(self): - p = self.cls(self.base) / 'dirA' - for q in p.iterdir(): - q.unlink() - p.rmdir() - self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) - def test_delete_file(self): p = self.cls(self.base) / 'fileA' p._delete() self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) + self.assertFileNotFound(p._delete) def test_delete_dir(self): base = self.cls(self.base) @@ -2347,7 +2313,7 @@ def setUp(self): def tearDown(self): base = self.cls(self.base) - base._rmtree() + base._delete() def test_walk_topdown(self): walker = self.walk_path.walk() From a03efb533a58fd13fb0cc7f4a5c02c8406a407bd Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Sun, 8 Dec 2024 10:46:34 -0800 Subject: [PATCH 22/55] gh-127734: improve signature of `urllib.request.HTTPPasswordMgrWithPriorAuth.__init__` (#127735) improve signature of urllib.request.HTTPPasswordMgrWithPriorAuth.__init__ --- Lib/urllib/request.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 7ef85431b718ad..c5a6a18a32bba1 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -876,9 +876,9 @@ def find_user_password(self, realm, authuri): class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm): - def __init__(self, *args, **kwargs): + def __init__(self): self.authenticated = {} - super().__init__(*args, **kwargs) + super().__init__() def add_password(self, realm, uri, user, passwd, is_authenticated=False): self.update_authenticated(uri, is_authenticated) From be07edf511365ce554c0535b535bb5726266a17a Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 9 Dec 2024 02:34:28 +0100 Subject: [PATCH 23/55] gh-127111: Emscripten Move link flags from `LDFLAGS_NODIST` to `LINKFORSHARED` (#127666) Corrects the usage of linking flags to avoid compilation errors related to the use of `-sEXPORTED_FUNCTIONS` when linking shared libraries. --- configure | 10 +++++----- configure.ac | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/configure b/configure index bcbab8dfcff190..57be576e3cae99 100755 --- a/configure +++ b/configure @@ -9430,14 +9430,14 @@ else $as_nop wasm_debug=no fi - as_fn_append LDFLAGS_NODIST " -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520" + as_fn_append LINKFORSHARED " -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520" as_fn_append LDFLAGS_NODIST " -sWASM_BIGINT" - as_fn_append LDFLAGS_NODIST " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" - as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV" - as_fn_append LDFLAGS_NODIST " -sEXPORTED_FUNCTIONS=_main,_Py_Version" - as_fn_append LDFLAGS_NODIST " -sSTACK_SIZE=5MB" + as_fn_append LINKFORSHARED " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" + as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV" + as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version" + as_fn_append LINKFORSHARED " -sSTACK_SIZE=5MB" if test "x$enable_wasm_dynamic_linking" = xyes then : diff --git a/configure.ac b/configure.ac index 922a125ea9608e..bd0221481c5341 100644 --- a/configure.ac +++ b/configure.ac @@ -2325,16 +2325,16 @@ AS_CASE([$ac_sys_system], AS_VAR_IF([Py_DEBUG], [yes], [wasm_debug=yes], [wasm_debug=no]) dnl Start with 20 MB and allow to grow - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520"]) dnl map int64_t and uint64_t to JS bigint AS_VAR_APPEND([LDFLAGS_NODIST], [" -sWASM_BIGINT"]) dnl Include file system support - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sSTACK_SIZE=5MB"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sSTACK_SIZE=5MB"]) AS_VAR_IF([enable_wasm_dynamic_linking], [yes], [ AS_VAR_APPEND([LINKFORSHARED], [" -sMAIN_MODULE"]) From 5876063d06ec55b10793f34bfe516c10f608665c Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 9 Dec 2024 03:01:37 +0100 Subject: [PATCH 24/55] gh-127503 Don't propagate native PATH to Emscripten Python (#127633) Modifies the handling of PATH to ensure that native executables aren't picked up when running under node. --- Tools/wasm/emscripten/node_entry.mjs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/wasm/emscripten/node_entry.mjs b/Tools/wasm/emscripten/node_entry.mjs index 40ab1515cf28c1..98b8f572a7e762 100644 --- a/Tools/wasm/emscripten/node_entry.mjs +++ b/Tools/wasm/emscripten/node_entry.mjs @@ -35,11 +35,12 @@ const settings = { mountDirectories(Module); Module.FS.chdir(process.cwd()); Object.assign(Module.ENV, process.env); + delete Module.ENV.PATH; }, // Ensure that sys.executable, sys._base_executable, etc point to python.sh // not to this file. To properly handle symlinks, python.sh needs to compute // its own path. - thisProgram: process.argv[thisProgramIndex], + thisProgram: process.argv[thisProgramIndex].slice(thisProgram.length), // After python.sh come the arguments thatthe user passed to python.sh. arguments: process.argv.slice(thisProgramIndex + 1), }; From d8d12b37b5e5acb354db84b07dab8de64a6b9475 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 9 Dec 2024 03:03:11 +0100 Subject: [PATCH 25/55] gh-127503: Fix realpath handling in emscripten cli (#127632) Corrects the handling of realpath on Linux. --- Tools/wasm/emscripten/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index c998ed71309dad..6843b6fdeceb8c 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -223,7 +223,7 @@ def configure_emscripten_python(context, working_dir): if which grealpath > /dev/null; then # It has brew installed gnu core utils, use that REALPATH="grealpath -s" - elif which realpath > /dev/null && realpath --version 2&>1 | grep GNU > /dev/null; then + elif which realpath > /dev/null && realpath --version > /dev/null 2> /dev/null && realpath --version | grep GNU > /dev/null; then # realpath points to GNU realpath so use it. REALPATH="realpath -s" else From 2041a95e68ebf6d13f867e214ada28affa830669 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Mon, 9 Dec 2024 13:28:57 +0800 Subject: [PATCH 26/55] gh-126925: Modify how iOS test results are gathered (#127592) Adds a `use_system_log` config item to enable stdout/stderr redirection for Apple platforms. This log streaming is then used by a new iOS test runner script, allowing the display of test suite output at runtime. The iOS test runner script can be used by any Python project, not just the CPython test suite. --- Doc/c-api/init_config.rst | 9 + Doc/using/ios.rst | 53 ++- Doc/whatsnew/3.14.rst | 7 + Include/cpython/initconfig.h | 3 + Lib/_apple_support.py | 66 ++++ Lib/test/test_apple.py | 155 ++++++++ Lib/test/test_capi/test_config.py | 4 + Lib/test/test_embed.py | 2 + Makefile.pre.in | 26 +- ...-12-04-15-04-12.gh-issue-126821.lKCLVV.rst | 2 + ...-12-04-15-03-24.gh-issue-126925.uxAMK-.rst | 2 + Python/initconfig.c | 15 + Python/pylifecycle.c | 82 ++++ Python/stdlib_module_names.h | 1 + iOS/README.rst | 54 ++- iOS/testbed/__main__.py | 365 ++++++++++++++++++ .../iOSTestbed.xcodeproj/project.pbxproj | 2 + iOS/testbed/iOSTestbedTests/iOSTestbedTests.m | 2 + 18 files changed, 792 insertions(+), 58 deletions(-) create mode 100644 Lib/_apple_support.py create mode 100644 Lib/test/test_apple.py create mode 100644 Misc/NEWS.d/next/Library/2024-12-04-15-04-12.gh-issue-126821.lKCLVV.rst create mode 100644 Misc/NEWS.d/next/Tests/2024-12-04-15-03-24.gh-issue-126925.uxAMK-.rst create mode 100644 iOS/testbed/__main__.py diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index d6569ddcf586fa..7497bf241fb10e 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -1281,6 +1281,15 @@ PyConfig Default: ``1`` in Python config and ``0`` in isolated config. + .. c:member:: int use_system_logger + + If non-zero, ``stdout`` and ``stderr`` will be redirected to the system + log. + + Only available on macOS 10.12 and later, and on iOS. + + Default: ``0`` (don't use system log). + .. c:member:: int user_site_directory If non-zero, add the user site directory to :data:`sys.path`. diff --git a/Doc/using/ios.rst b/Doc/using/ios.rst index 4d4eb2031ee980..aa43f75ec35a6c 100644 --- a/Doc/using/ios.rst +++ b/Doc/using/ios.rst @@ -292,10 +292,12 @@ To add Python to an iOS Xcode project: 10. Add Objective C code to initialize and use a Python interpreter in embedded mode. You should ensure that: - * :c:member:`UTF-8 mode ` is *enabled*; - * :c:member:`Buffered stdio ` is *disabled*; - * :c:member:`Writing bytecode ` is *disabled*; - * :c:member:`Signal handlers ` are *enabled*; + * UTF-8 mode (:c:member:`PyPreConfig.utf8_mode`) is *enabled*; + * Buffered stdio (:c:member:`PyConfig.buffered_stdio`) is *disabled*; + * Writing bytecode (:c:member:`PyConfig.write_bytecode`) is *disabled*; + * Signal handlers (:c:member:`PyConfig.install_signal_handlers`) are *enabled*; + * System logging (:c:member:`PyConfig.use_system_logger`) is *enabled* + (optional, but strongly recommended); * ``PYTHONHOME`` for the interpreter is configured to point at the ``python`` subfolder of your app's bundle; and * The ``PYTHONPATH`` for the interpreter includes: @@ -324,6 +326,49 @@ modules in your app, some additional steps will be required: * If you're using a separate folder for third-party packages, ensure that folder is included as part of the ``PYTHONPATH`` configuration in step 10. +Testing a Python package +------------------------ + +The CPython source tree contains :source:`a testbed project ` that +is used to run the CPython test suite on the iOS simulator. This testbed can also +be used as a testbed project for running your Python library's test suite on iOS. + +After building or obtaining an iOS XCFramework (See :source:`iOS/README.rst` +for details), create a clone of the Python iOS testbed project by running: + +.. code-block:: bash + + $ python iOS/testbed clone --framework --app --app app-testbed + +You will need to modify the ``iOS/testbed`` reference to point to that +directory in the CPython source tree; any folders specified with the ``--app`` +flag will be copied into the cloned testbed project. The resulting testbed will +be created in the ``app-testbed`` folder. In this example, the ``module1`` and +``module2`` would be importable modules at runtime. If your project has +additional dependencies, they can be installed into the +``app-testbed/iOSTestbed/app_packages`` folder (using ``pip install --target +app-testbed/iOSTestbed/app_packages`` or similar). + +You can then use the ``app-testbed`` folder to run the test suite for your app, +For example, if ``module1.tests`` was the entry point to your test suite, you +could run: + +.. code-block:: bash + + $ python app-testbed run -- module1.tests + +This is the equivalent of running ``python -m module1.tests`` on a desktop +Python build. Any arguments after the ``--`` will be passed to the testbed as +if they were arguments to ``python -m`` on a desktop machine. + +You can also open the testbed project in Xcode by running: + +.. code-block:: bash + + $ open app-testbed/iOSTestbed.xcodeproj + +This will allow you to use the full Xcode suite of tools for debugging. + App Store Compliance ==================== diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b300e348679438..b71d31f9742fe0 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -245,6 +245,13 @@ Other language changes making it a :term:`generic type`. (Contributed by Brian Schubert in :gh:`126012`.) +* iOS and macOS apps can now be configured to redirect ``stdout`` and + ``stderr`` content to the system log. (Contributed by Russell Keith-Magee in + :gh:`127592`.) + +* The iOS testbed is now able to stream test output while the test is running. + The testbed can also be used to run the test suite of projects other than + CPython itself. (Contributed by Russell Keith-Magee in :gh:`127592`.) New modules =========== diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index f69c586a4f96f3..8ef19f677066c2 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -179,6 +179,9 @@ typedef struct PyConfig { int use_frozen_modules; int safe_path; int int_max_str_digits; +#ifdef __APPLE__ + int use_system_logger; +#endif int cpu_count; #ifdef Py_GIL_DISABLED diff --git a/Lib/_apple_support.py b/Lib/_apple_support.py new file mode 100644 index 00000000000000..92febdcf587070 --- /dev/null +++ b/Lib/_apple_support.py @@ -0,0 +1,66 @@ +import io +import sys + + +def init_streams(log_write, stdout_level, stderr_level): + # Redirect stdout and stderr to the Apple system log. This method is + # invoked by init_apple_streams() (initconfig.c) if config->use_system_logger + # is enabled. + sys.stdout = SystemLog(log_write, stdout_level, errors=sys.stderr.errors) + sys.stderr = SystemLog(log_write, stderr_level, errors=sys.stderr.errors) + + +class SystemLog(io.TextIOWrapper): + def __init__(self, log_write, level, **kwargs): + kwargs.setdefault("encoding", "UTF-8") + kwargs.setdefault("line_buffering", True) + super().__init__(LogStream(log_write, level), **kwargs) + + def __repr__(self): + return f"" + + def write(self, s): + if not isinstance(s, str): + raise TypeError( + f"write() argument must be str, not {type(s).__name__}") + + # In case `s` is a str subclass that writes itself to stdout or stderr + # when we call its methods, convert it to an actual str. + s = str.__str__(s) + + # We want to emit one log message per line, so split + # the string before sending it to the superclass. + for line in s.splitlines(keepends=True): + super().write(line) + + return len(s) + + +class LogStream(io.RawIOBase): + def __init__(self, log_write, level): + self.log_write = log_write + self.level = level + + def __repr__(self): + return f"" + + def writable(self): + return True + + def write(self, b): + if type(b) is not bytes: + try: + b = bytes(memoryview(b)) + except TypeError: + raise TypeError( + f"write() argument must be bytes-like, not {type(b).__name__}" + ) from None + + # Writing an empty string to the stream should have no effect. + if b: + # Encode null bytes using "modified UTF-8" to avoid truncating the + # message. This should not affect the return value, as the caller + # may be expecting it to match the length of the input. + self.log_write(self.level, b.replace(b"\x00", b"\xc0\x80")) + + return len(b) diff --git a/Lib/test/test_apple.py b/Lib/test/test_apple.py new file mode 100644 index 00000000000000..ab5296afad1d3f --- /dev/null +++ b/Lib/test/test_apple.py @@ -0,0 +1,155 @@ +import unittest +from _apple_support import SystemLog +from test.support import is_apple +from unittest.mock import Mock, call + +if not is_apple: + raise unittest.SkipTest("Apple-specific") + + +# Test redirection of stdout and stderr to the Apple system log. +class TestAppleSystemLogOutput(unittest.TestCase): + maxDiff = None + + def assert_writes(self, output): + self.assertEqual( + self.log_write.mock_calls, + [ + call(self.log_level, line) + for line in output + ] + ) + + self.log_write.reset_mock() + + def setUp(self): + self.log_write = Mock() + self.log_level = 42 + self.log = SystemLog(self.log_write, self.log_level, errors="replace") + + def test_repr(self): + self.assertEqual(repr(self.log), "") + self.assertEqual(repr(self.log.buffer), "") + + def test_log_config(self): + self.assertIs(self.log.writable(), True) + self.assertIs(self.log.readable(), False) + + self.assertEqual("UTF-8", self.log.encoding) + self.assertEqual("replace", self.log.errors) + + self.assertIs(self.log.line_buffering, True) + self.assertIs(self.log.write_through, False) + + def test_empty_str(self): + self.log.write("") + self.log.flush() + + self.assert_writes([]) + + def test_simple_str(self): + self.log.write("hello world\n") + + self.assert_writes([b"hello world\n"]) + + def test_buffered_str(self): + self.log.write("h") + self.log.write("ello") + self.log.write(" ") + self.log.write("world\n") + self.log.write("goodbye.") + self.log.flush() + + self.assert_writes([b"hello world\n", b"goodbye."]) + + def test_manual_flush(self): + self.log.write("Hello") + + self.assert_writes([]) + + self.log.write(" world\nHere for a while...\nGoodbye") + self.assert_writes([b"Hello world\n", b"Here for a while...\n"]) + + self.log.write(" world\nHello again") + self.assert_writes([b"Goodbye world\n"]) + + self.log.flush() + self.assert_writes([b"Hello again"]) + + def test_non_ascii(self): + # Spanish + self.log.write("ol\u00e9\n") + self.assert_writes([b"ol\xc3\xa9\n"]) + + # Chinese + self.log.write("\u4e2d\u6587\n") + self.assert_writes([b"\xe4\xb8\xad\xe6\x96\x87\n"]) + + # Printing Non-BMP emoji + self.log.write("\U0001f600\n") + self.assert_writes([b"\xf0\x9f\x98\x80\n"]) + + # Non-encodable surrogates are replaced + self.log.write("\ud800\udc00\n") + self.assert_writes([b"??\n"]) + + def test_modified_null(self): + # Null characters are logged using "modified UTF-8". + self.log.write("\u0000\n") + self.assert_writes([b"\xc0\x80\n"]) + self.log.write("a\u0000\n") + self.assert_writes([b"a\xc0\x80\n"]) + self.log.write("\u0000b\n") + self.assert_writes([b"\xc0\x80b\n"]) + self.log.write("a\u0000b\n") + self.assert_writes([b"a\xc0\x80b\n"]) + + def test_nonstandard_str(self): + # String subclasses are accepted, but they should be converted + # to a standard str without calling any of their methods. + class CustomStr(str): + def splitlines(self, *args, **kwargs): + raise AssertionError() + + def __len__(self): + raise AssertionError() + + def __str__(self): + raise AssertionError() + + self.log.write(CustomStr("custom\n")) + self.assert_writes([b"custom\n"]) + + def test_non_str(self): + # Non-string classes are not accepted. + for obj in [b"", b"hello", None, 42]: + with self.subTest(obj=obj): + with self.assertRaisesRegex( + TypeError, + fr"write\(\) argument must be str, not " + fr"{type(obj).__name__}" + ): + self.log.write(obj) + + def test_byteslike_in_buffer(self): + # The underlying buffer *can* accept bytes-like objects + self.log.buffer.write(bytearray(b"hello")) + self.log.flush() + + self.log.buffer.write(b"") + self.log.flush() + + self.log.buffer.write(b"goodbye") + self.log.flush() + + self.assert_writes([b"hello", b"goodbye"]) + + def test_non_byteslike_in_buffer(self): + for obj in ["hello", None, 42]: + with self.subTest(obj=obj): + with self.assertRaisesRegex( + TypeError, + fr"write\(\) argument must be bytes-like, not " + fr"{type(obj).__name__}" + ): + self.log.buffer.write(obj) diff --git a/Lib/test/test_capi/test_config.py b/Lib/test/test_capi/test_config.py index 77730ad2f32085..a3179efe4a8235 100644 --- a/Lib/test/test_capi/test_config.py +++ b/Lib/test/test_capi/test_config.py @@ -110,6 +110,10 @@ def test_config_get(self): options.extend(( ("_pystats", bool, None), )) + if support.is_apple: + options.extend(( + ("use_system_logger", bool, None), + )) for name, option_type, sys_attr in options: with self.subTest(name=name, option_type=option_type, diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 5c38b28322deb4..7110fb889f3c8e 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -649,6 +649,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): CONFIG_COMPAT.update({ 'legacy_windows_stdio': False, }) + if support.is_apple: + CONFIG_COMPAT['use_system_logger'] = False CONFIG_PYTHON = dict(CONFIG_COMPAT, _config_init=API_PYTHON, diff --git a/Makefile.pre.in b/Makefile.pre.in index dd8a3ab82eacd2..7b66802147dc3a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2146,7 +2146,6 @@ testuniversal: all # This must be run *after* a `make install` has completed the build. The # `--with-framework-name` argument *cannot* be used when configuring the build. XCFOLDER:=iOSTestbed.$(MULTIARCH).$(shell date +%s) -XCRESULT=$(XCFOLDER)/$(MULTIARCH).xcresult .PHONY: testios testios: @if test "$(MACHDEP)" != "ios"; then \ @@ -2165,29 +2164,12 @@ testios: echo "Cannot find a finalized iOS Python.framework. Have you run 'make install' to finalize the framework build?"; \ exit 1;\ fi - # Copy the testbed project into the build folder - cp -r $(srcdir)/iOS/testbed $(XCFOLDER) - # Copy the framework from the install location to the testbed project. - cp -r $(PYTHONFRAMEWORKPREFIX)/* $(XCFOLDER)/Python.xcframework/ios-arm64_x86_64-simulator - - # Run the test suite for the Xcode project, targeting the iOS simulator. - # If the suite fails, touch a file in the test folder as a marker - if ! xcodebuild test -project $(XCFOLDER)/iOSTestbed.xcodeproj -scheme "iOSTestbed" -destination "platform=iOS Simulator,name=iPhone SE (3rd Generation)" -resultBundlePath $(XCRESULT) -derivedDataPath $(XCFOLDER)/DerivedData ; then \ - touch $(XCFOLDER)/failed; \ - fi - # Regardless of success or failure, extract and print the test output - xcrun xcresulttool get --path $(XCRESULT) \ - --id $$( \ - xcrun xcresulttool get --path $(XCRESULT) --format json | \ - $(PYTHON_FOR_BUILD) -c "import sys, json; result = json.load(sys.stdin); print(result['actions']['_values'][0]['actionResult']['logRef']['id']['_value'])" \ - ) \ - --format json | \ - $(PYTHON_FOR_BUILD) -c "import sys, json; result = json.load(sys.stdin); print(result['subsections']['_values'][1]['subsections']['_values'][0]['emittedOutput']['_value'])" + # Clone the testbed project into the XCFOLDER + $(PYTHON_FOR_BUILD) $(srcdir)/iOS/testbed clone --framework $(PYTHONFRAMEWORKPREFIX) "$(XCFOLDER)" - @if test -e $(XCFOLDER)/failed ; then \ - exit 1; \ - fi + # Run the testbed project + $(PYTHON_FOR_BUILD) "$(XCFOLDER)" run -- test -uall --single-process --rerun -W # Like test, but using --slow-ci which enables all test resources and use # longer timeout. Run an optional pybuildbot.identify script to include diff --git a/Misc/NEWS.d/next/Library/2024-12-04-15-04-12.gh-issue-126821.lKCLVV.rst b/Misc/NEWS.d/next/Library/2024-12-04-15-04-12.gh-issue-126821.lKCLVV.rst new file mode 100644 index 00000000000000..677acf5baab3fa --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-04-15-04-12.gh-issue-126821.lKCLVV.rst @@ -0,0 +1,2 @@ +macOS and iOS apps can now choose to redirect stdout and stderr to the +system log during interpreter configuration. diff --git a/Misc/NEWS.d/next/Tests/2024-12-04-15-03-24.gh-issue-126925.uxAMK-.rst b/Misc/NEWS.d/next/Tests/2024-12-04-15-03-24.gh-issue-126925.uxAMK-.rst new file mode 100644 index 00000000000000..fb307c7cb9bf1d --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-12-04-15-03-24.gh-issue-126925.uxAMK-.rst @@ -0,0 +1,2 @@ +iOS test results are now streamed during test execution, and the deprecated +xcresulttool is no longer used. diff --git a/Python/initconfig.c b/Python/initconfig.c index 438f8a5c1cf1ce..7851b86db1f6d0 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -168,6 +168,9 @@ static const PyConfigSpec PYCONFIG_SPEC[] = { SPEC(tracemalloc, UINT, READ_ONLY, NO_SYS), SPEC(use_frozen_modules, BOOL, READ_ONLY, NO_SYS), SPEC(use_hash_seed, BOOL, READ_ONLY, NO_SYS), +#ifdef __APPLE__ + SPEC(use_system_logger, BOOL, PUBLIC, NO_SYS), +#endif SPEC(user_site_directory, BOOL, READ_ONLY, NO_SYS), // sys.flags.no_user_site SPEC(warn_default_encoding, BOOL, READ_ONLY, NO_SYS), @@ -884,6 +887,9 @@ config_check_consistency(const PyConfig *config) assert(config->cpu_count != 0); // config->use_frozen_modules is initialized later // by _PyConfig_InitImportConfig(). +#ifdef __APPLE__ + assert(config->use_system_logger >= 0); +#endif #ifdef Py_STATS assert(config->_pystats >= 0); #endif @@ -986,6 +992,9 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->_is_python_build = 0; config->code_debug_ranges = 1; config->cpu_count = -1; +#ifdef __APPLE__ + config->use_system_logger = 0; +#endif #ifdef Py_GIL_DISABLED config->enable_gil = _PyConfig_GIL_DEFAULT; config->tlbc_enabled = 1; @@ -1015,6 +1024,9 @@ config_init_defaults(PyConfig *config) #ifdef MS_WINDOWS config->legacy_windows_stdio = 0; #endif +#ifdef __APPLE__ + config->use_system_logger = 0; +#endif } @@ -1049,6 +1061,9 @@ PyConfig_InitIsolatedConfig(PyConfig *config) #ifdef MS_WINDOWS config->legacy_windows_stdio = 0; #endif +#ifdef __APPLE__ + config->use_system_logger = 0; +#endif } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index ceb30e9f02df2c..06418123d6dd9b 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -45,7 +45,9 @@ #endif #if defined(__APPLE__) +# include # include +# include #endif #ifdef HAVE_SIGNAL_H @@ -75,6 +77,9 @@ static PyStatus init_sys_streams(PyThreadState *tstate); #ifdef __ANDROID__ static PyStatus init_android_streams(PyThreadState *tstate); #endif +#if defined(__APPLE__) +static PyStatus init_apple_streams(PyThreadState *tstate); +#endif static void wait_for_thread_shutdown(PyThreadState *tstate); static void finalize_subinterpreters(void); static void call_ll_exitfuncs(_PyRuntimeState *runtime); @@ -1257,6 +1262,14 @@ init_interp_main(PyThreadState *tstate) return status; } #endif +#if defined(__APPLE__) + if (config->use_system_logger) { + status = init_apple_streams(tstate); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } +#endif #ifdef Py_DEBUG run_presite(tstate); @@ -2933,6 +2946,75 @@ init_android_streams(PyThreadState *tstate) #endif // __ANDROID__ +#if defined(__APPLE__) + +static PyObject * +apple_log_write_impl(PyObject *self, PyObject *args) +{ + int logtype = 0; + const char *text = NULL; + if (!PyArg_ParseTuple(args, "iy", &logtype, &text)) { + return NULL; + } + + // Call the underlying Apple logging API. The os_log unified logging APIs + // were introduced in macOS 10.12, iOS 10.0, tvOS 10.0, and watchOS 3.0; + // this call is a no-op on older versions. + #if TARGET_OS_IPHONE || (TARGET_OS_OSX && MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_12) + // Pass the user-provided text through explicit %s formatting + // to avoid % literals being interpreted as a formatting directive. + os_log_with_type(OS_LOG_DEFAULT, logtype, "%s", text); + #endif + Py_RETURN_NONE; +} + + +static PyMethodDef apple_log_write_method = { + "apple_log_write", apple_log_write_impl, METH_VARARGS +}; + + +static PyStatus +init_apple_streams(PyThreadState *tstate) +{ + PyStatus status = _PyStatus_OK(); + PyObject *_apple_support = NULL; + PyObject *apple_log_write = NULL; + PyObject *result = NULL; + + _apple_support = PyImport_ImportModule("_apple_support"); + if (_apple_support == NULL) { + goto error; + } + + apple_log_write = PyCFunction_New(&apple_log_write_method, NULL); + if (apple_log_write == NULL) { + goto error; + } + + // Initialize the logging streams, sending stdout -> Default; stderr -> Error + result = PyObject_CallMethod( + _apple_support, "init_streams", "Oii", + apple_log_write, OS_LOG_TYPE_DEFAULT, OS_LOG_TYPE_ERROR); + if (result == NULL) { + goto error; + } + + goto done; + +error: + _PyErr_Print(tstate); + status = _PyStatus_ERR("failed to initialize Apple log streams"); + +done: + Py_XDECREF(result); + Py_XDECREF(apple_log_write); + Py_XDECREF(_apple_support); + return status; +} + +#endif // __APPLE__ + static void _Py_FatalError_DumpTracebacks(int fd, PyInterpreterState *interp, diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index c8cdb933bb108f..584b050fc4bb6e 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -6,6 +6,7 @@ static const char* _Py_stdlib_module_names[] = { "_abc", "_aix_support", "_android_support", +"_apple_support", "_ast", "_asyncio", "_bisect", diff --git a/iOS/README.rst b/iOS/README.rst index e33455eef8f44a..9cea98cf1abbfa 100644 --- a/iOS/README.rst +++ b/iOS/README.rst @@ -285,52 +285,42 @@ This will: * Install the Python iOS framework into the copy of the testbed project; and * Run the test suite on an "iPhone SE (3rd generation)" simulator. -While the test suite is running, Xcode does not display any console output. -After showing some Xcode build commands, the console output will print ``Testing -started``, and then appear to stop. It will remain in this state until the test -suite completes. On a 2022 M1 MacBook Pro, the test suite takes approximately 12 -minutes to run; a couple of extra minutes is required to boot and prepare the -iOS simulator. - On success, the test suite will exit and report successful completion of the -test suite. No output of the Python test suite will be displayed. - -On failure, the output of the Python test suite *will* be displayed. This will -show the details of the tests that failed. +test suite. On a 2022 M1 MacBook Pro, the test suite takes approximately 12 +minutes to run; a couple of extra minutes is required to compile the testbed +project, and then boot and prepare the iOS simulator. Debugging test failures ----------------------- -The easiest way to diagnose a single test failure is to open the testbed project -in Xcode and run the tests from there using the "Product > Test" menu item. - -To test in Xcode, you must ensure the testbed project has a copy of a compiled -framework. If you've configured your build with the default install location of -``iOS/Frameworks``, you can copy from that location into the test project. To -test on an ARM64 simulator, run:: - - $ rm -rf iOS/testbed/Python.xcframework/ios-arm64_x86_64-simulator/* - $ cp -r iOS/Frameworks/arm64-iphonesimulator/* iOS/testbed/Python.xcframework/ios-arm64_x86_64-simulator +Running ``make test`` generates a standalone version of the ``iOS/testbed`` +project, and runs the full test suite. It does this using ``iOS/testbed`` +itself - the folder is an executable module that can be used to create and run +a clone of the testbed project. -To test on an x86-64 simulator, run:: +You can generate your own standalone testbed instance by running:: - $ rm -rf iOS/testbed/Python.xcframework/ios-arm64_x86_64-simulator/* - $ cp -r iOS/Frameworks/x86_64-iphonesimulator/* iOS/testbed/Python.xcframework/ios-arm64_x86_64-simulator + $ python iOS/testbed clone --framework iOS/Frameworks/arm64-iphonesimulator my-testbed -To test on a physical device:: +This invocation assumes that ``iOS/Frameworks/arm64-iphonesimulator`` is the +path to the iOS simulator framework for your platform (ARM64 in this case); +``my-testbed`` is the name of the folder for the new testbed clone. - $ rm -rf iOS/testbed/Python.xcframework/ios-arm64/* - $ cp -r iOS/Frameworks/arm64-iphoneos/* iOS/testbed/Python.xcframework/ios-arm64 +You can then use the ``my-testbed`` folder to run the Python test suite, +passing in any command line arguments you may require. For example, if you're +trying to diagnose a failure in the ``os`` module, you might run:: -Alternatively, you can configure your build to install directly into the -testbed project. For a simulator, use:: + $ python my-testbed run -- test -W test_os - --enable-framework=$(pwd)/iOS/testbed/Python.xcframework/ios-arm64_x86_64-simulator +This is the equivalent of running ``python -m test -W test_os`` on a desktop +Python build. Any arguments after the ``--`` will be passed to testbed as if +they were arguments to ``python -m`` on a desktop machine. -For a physical device, use:: +You can also open the testbed project in Xcode by running:: - --enable-framework=$(pwd)/iOS/testbed/Python.xcframework/ios-arm64 + $ open my-testbed/iOSTestbed.xcodeproj +This will allow you to use the full Xcode suite of tools for debugging. Testing on an iOS device ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/iOS/testbed/__main__.py b/iOS/testbed/__main__.py new file mode 100644 index 00000000000000..22570ee0f3ed04 --- /dev/null +++ b/iOS/testbed/__main__.py @@ -0,0 +1,365 @@ +import argparse +import asyncio +import json +import plistlib +import shutil +import subprocess +import sys +from contextlib import asynccontextmanager +from datetime import datetime +from pathlib import Path + + +DECODE_ARGS = ("UTF-8", "backslashreplace") + + +# Work around a bug involving sys.exit and TaskGroups +# (https://github.com/python/cpython/issues/101515). +def exit(*args): + raise MySystemExit(*args) + + +class MySystemExit(Exception): + pass + + +# All subprocesses are executed through this context manager so that no matter +# what happens, they can always be cancelled from another task, and they will +# always be cleaned up on exit. +@asynccontextmanager +async def async_process(*args, **kwargs): + process = await asyncio.create_subprocess_exec(*args, **kwargs) + try: + yield process + finally: + if process.returncode is None: + # Allow a reasonably long time for Xcode to clean itself up, + # because we don't want stale emulators left behind. + timeout = 10 + process.terminate() + try: + await asyncio.wait_for(process.wait(), timeout) + except TimeoutError: + print( + f"Command {args} did not terminate after {timeout} seconds " + f" - sending SIGKILL" + ) + process.kill() + + # Even after killing the process we must still wait for it, + # otherwise we'll get the warning "Exception ignored in __del__". + await asyncio.wait_for(process.wait(), timeout=1) + + +async def async_check_output(*args, **kwargs): + async with async_process( + *args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs + ) as process: + stdout, stderr = await process.communicate() + if process.returncode == 0: + return stdout.decode(*DECODE_ARGS) + else: + raise subprocess.CalledProcessError( + process.returncode, + args, + stdout.decode(*DECODE_ARGS), + stderr.decode(*DECODE_ARGS), + ) + + +# Return a list of UDIDs associated with booted simulators +async def list_devices(): + # List the testing simulators, in JSON format + raw_json = await async_check_output( + "xcrun", "simctl", "--set", "testing", "list", "-j" + ) + json_data = json.loads(raw_json) + + # Filter out the booted iOS simulators + return [ + simulator["udid"] + for runtime, simulators in json_data["devices"].items() + for simulator in simulators + if runtime.split(".")[-1].startswith("iOS") and simulator["state"] == "Booted" + ] + + +async def find_device(initial_devices): + while True: + new_devices = set(await list_devices()).difference(initial_devices) + if len(new_devices) == 0: + await asyncio.sleep(1) + elif len(new_devices) == 1: + udid = new_devices.pop() + print(f"{datetime.now():%Y-%m-%d %H:%M:%S}: New test simulator detected") + print(f"UDID: {udid}") + return udid + else: + exit(f"Found more than one new device: {new_devices}") + + +async def log_stream_task(initial_devices): + # Wait up to 5 minutes for the build to complete and the simulator to boot. + udid = await asyncio.wait_for(find_device(initial_devices), 5 * 60) + + # Stream the iOS device's logs, filtering out messages that come from the + # XCTest test suite (catching NSLog messages from the test method), or + # Python itself (catching stdout/stderr content routed to the system log + # with config->use_system_logger). + args = [ + "xcrun", + "simctl", + "--set", + "testing", + "spawn", + udid, + "log", + "stream", + "--style", + "compact", + "--predicate", + ( + 'senderImagePath ENDSWITH "/iOSTestbedTests.xctest/iOSTestbedTests"' + ' OR senderImagePath ENDSWITH "/Python.framework/Python"' + ), + ] + + async with async_process( + *args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) as process: + suppress_dupes = False + while line := (await process.stdout.readline()).decode(*DECODE_ARGS): + # The iOS log streamer can sometimes lag; when it does, it outputs + # a warning about messages being dropped... often multiple times. + # Only print the first of these duplicated warnings. + if line.startswith("=== Messages dropped "): + if not suppress_dupes: + suppress_dupes = True + sys.stdout.write(line) + else: + suppress_dupes = False + sys.stdout.write(line) + + +async def xcode_test(location, simulator): + # Run the test suite on the named simulator + args = [ + "xcodebuild", + "test", + "-project", + str(location / "iOSTestbed.xcodeproj"), + "-scheme", + "iOSTestbed", + "-destination", + f"platform=iOS Simulator,name={simulator}", + "-resultBundlePath", + str(location / f"{datetime.now():%Y%m%d-%H%M%S}.xcresult"), + "-derivedDataPath", + str(location / "DerivedData"), + ] + async with async_process( + *args, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) as process: + while line := (await process.stdout.readline()).decode(*DECODE_ARGS): + sys.stdout.write(line) + + status = await asyncio.wait_for(process.wait(), timeout=1) + exit(status) + + +def clone_testbed( + source: Path, + target: Path, + framework: Path, + apps: list[Path], +) -> None: + if target.exists(): + print(f"{target} already exists; aborting without creating project.") + sys.exit(10) + + if framework is None: + if not (source / "Python.xcframework/ios-arm64_x86_64-simulator/bin").is_dir(): + print( + f"The testbed being cloned ({source}) does not contain " + f"a simulator framework. Re-run with --framework" + ) + sys.exit(11) + else: + if not framework.is_dir(): + print(f"{framework} does not exist.") + sys.exit(12) + elif not ( + framework.suffix == ".xcframework" + or (framework / "Python.framework").is_dir() + ): + print( + f"{framework} is not an XCframework, " + f"or a simulator slice of a framework build." + ) + sys.exit(13) + + print("Cloning testbed project...") + shutil.copytree(source, target) + + if framework is not None: + if framework.suffix == ".xcframework": + print("Installing XCFramework...") + xc_framework_path = target / "Python.xcframework" + shutil.rmtree(xc_framework_path) + shutil.copytree(framework, xc_framework_path) + else: + print("Installing simulator Framework...") + sim_framework_path = ( + target / "Python.xcframework" / "ios-arm64_x86_64-simulator" + ) + shutil.rmtree(sim_framework_path) + shutil.copytree(framework, sim_framework_path) + else: + print("Using pre-existing iOS framework.") + + for app_src in apps: + print(f"Installing app {app_src.name!r}...") + app_target = target / f"iOSTestbed/app/{app_src.name}" + if app_target.is_dir(): + shutil.rmtree(app_target) + shutil.copytree(app_src, app_target) + + print(f"Testbed project created in {target}") + + +def update_plist(testbed_path, args): + # Add the test runner arguments to the testbed's Info.plist file. + info_plist = testbed_path / "iOSTestbed" / "iOSTestbed-Info.plist" + with info_plist.open("rb") as f: + info = plistlib.load(f) + + info["TestArgs"] = args + + with info_plist.open("wb") as f: + plistlib.dump(info, f) + + +async def run_testbed(simulator: str, args: list[str]): + location = Path(__file__).parent + print("Updating plist...") + update_plist(location, args) + + # Get the list of devices that are booted at the start of the test run. + # The simulator started by the test suite will be detected as the new + # entry that appears on the device list. + initial_devices = await list_devices() + + try: + async with asyncio.TaskGroup() as tg: + tg.create_task(log_stream_task(initial_devices)) + tg.create_task(xcode_test(location, simulator)) + except* MySystemExit as e: + raise SystemExit(*e.exceptions[0].args) from None + except* subprocess.CalledProcessError as e: + # Extract it from the ExceptionGroup so it can be handled by `main`. + raise e.exceptions[0] + + +def main(): + parser = argparse.ArgumentParser( + description=( + "Manages the process of testing a Python project in the iOS simulator." + ), + ) + + subcommands = parser.add_subparsers(dest="subcommand") + + clone = subcommands.add_parser( + "clone", + description=( + "Clone the testbed project, copying in an iOS Python framework and" + "any specified application code." + ), + help="Clone a testbed project to a new location.", + ) + clone.add_argument( + "--framework", + help=( + "The location of the XCFramework (or simulator-only slice of an " + "XCFramework) to use when running the testbed" + ), + ) + clone.add_argument( + "--app", + dest="apps", + action="append", + default=[], + help="The location of any code to include in the testbed project", + ) + clone.add_argument( + "location", + help="The path where the testbed will be cloned.", + ) + + run = subcommands.add_parser( + "run", + usage="%(prog)s [-h] [--simulator SIMULATOR] -- [ ...]", + description=( + "Run a testbed project. The arguments provided after `--` will be " + "passed to the running iOS process as if they were arguments to " + "`python -m`." + ), + help="Run a testbed project", + ) + run.add_argument( + "--simulator", + default="iPhone SE (3rd Generation)", + help="The name of the simulator to use (default: 'iPhone SE (3rd Generation)')", + ) + + try: + pos = sys.argv.index("--") + testbed_args = sys.argv[1:pos] + test_args = sys.argv[pos + 1 :] + except ValueError: + testbed_args = sys.argv[1:] + test_args = [] + + context = parser.parse_args(testbed_args) + + if context.subcommand == "clone": + clone_testbed( + source=Path(__file__).parent, + target=Path(context.location), + framework=Path(context.framework) if context.framework else None, + apps=[Path(app) for app in context.apps], + ) + elif context.subcommand == "run": + if test_args: + if not ( + Path(__file__).parent / "Python.xcframework/ios-arm64_x86_64-simulator/bin" + ).is_dir(): + print( + f"Testbed does not contain a compiled iOS framework. Use " + f"`python {sys.argv[0]} clone ...` to create a runnable " + f"clone of this testbed." + ) + sys.exit(20) + + asyncio.run( + run_testbed( + simulator=context.simulator, + args=test_args, + ) + ) + else: + print(f"Must specify test arguments (e.g., {sys.argv[0]} run -- test)") + print() + parser.print_help(sys.stderr) + sys.exit(21) + else: + parser.print_help(sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/iOS/testbed/iOSTestbed.xcodeproj/project.pbxproj b/iOS/testbed/iOSTestbed.xcodeproj/project.pbxproj index 6819ac0eeed95f..c7d63909ee2453 100644 --- a/iOS/testbed/iOSTestbed.xcodeproj/project.pbxproj +++ b/iOS/testbed/iOSTestbed.xcodeproj/project.pbxproj @@ -263,6 +263,7 @@ runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; shellScript = "set -e\n\nmkdir -p \"$CODESIGNING_FOLDER_PATH/python/lib\"\nif [ \"$EFFECTIVE_PLATFORM_NAME\" = \"-iphonesimulator\" ]; then\n echo \"Installing Python modules for iOS Simulator\"\n rsync -au --delete \"$PROJECT_DIR/Python.xcframework/ios-arm64_x86_64-simulator/lib/\" \"$CODESIGNING_FOLDER_PATH/python/lib/\" \nelse\n echo \"Installing Python modules for iOS Device\"\n rsync -au --delete \"$PROJECT_DIR/Python.xcframework/ios-arm64/lib/\" \"$CODESIGNING_FOLDER_PATH/python/lib/\" \nfi\n"; + showEnvVarsInLog = 0; }; 607A66562B0F06200010BFC8 /* Prepare Python Binary Modules */ = { isa = PBXShellScriptBuildPhase; @@ -282,6 +283,7 @@ runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; shellScript = "set -e\n\ninstall_dylib () {\n INSTALL_BASE=$1\n FULL_EXT=$2\n\n # The name of the extension file\n EXT=$(basename \"$FULL_EXT\")\n # The location of the extension file, relative to the bundle\n RELATIVE_EXT=${FULL_EXT#$CODESIGNING_FOLDER_PATH/} \n # The path to the extension file, relative to the install base\n PYTHON_EXT=${RELATIVE_EXT/$INSTALL_BASE/}\n # The full dotted name of the extension module, constructed from the file path.\n FULL_MODULE_NAME=$(echo $PYTHON_EXT | cut -d \".\" -f 1 | tr \"/\" \".\"); \n # A bundle identifier; not actually used, but required by Xcode framework packaging\n FRAMEWORK_BUNDLE_ID=$(echo $PRODUCT_BUNDLE_IDENTIFIER.$FULL_MODULE_NAME | tr \"_\" \"-\")\n # The name of the framework folder.\n FRAMEWORK_FOLDER=\"Frameworks/$FULL_MODULE_NAME.framework\"\n\n # If the framework folder doesn't exist, create it.\n if [ ! -d \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER\" ]; then\n echo \"Creating framework for $RELATIVE_EXT\" \n mkdir -p \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER\"\n cp \"$CODESIGNING_FOLDER_PATH/dylib-Info-template.plist\" \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist\"\n plutil -replace CFBundleExecutable -string \"$FULL_MODULE_NAME\" \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist\"\n plutil -replace CFBundleIdentifier -string \"$FRAMEWORK_BUNDLE_ID\" \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/Info.plist\"\n fi\n \n echo \"Installing binary for $FRAMEWORK_FOLDER/$FULL_MODULE_NAME\" \n mv \"$FULL_EXT\" \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/$FULL_MODULE_NAME\"\n # Create a placeholder .fwork file where the .so was\n echo \"$FRAMEWORK_FOLDER/$FULL_MODULE_NAME\" > ${FULL_EXT%.so}.fwork\n # Create a back reference to the .so file location in the framework\n echo \"${RELATIVE_EXT%.so}.fwork\" > \"$CODESIGNING_FOLDER_PATH/$FRAMEWORK_FOLDER/$FULL_MODULE_NAME.origin\" \n}\n\nPYTHON_VER=$(ls -1 \"$CODESIGNING_FOLDER_PATH/python/lib\")\necho \"Install Python $PYTHON_VER standard library extension modules...\"\nfind \"$CODESIGNING_FOLDER_PATH/python/lib/$PYTHON_VER/lib-dynload\" -name \"*.so\" | while read FULL_EXT; do\n install_dylib python/lib/$PYTHON_VER/lib-dynload/ \"$FULL_EXT\"\ndone\necho \"Install app package extension modules...\"\nfind \"$CODESIGNING_FOLDER_PATH/app_packages\" -name \"*.so\" | while read FULL_EXT; do\n install_dylib app_packages/ \"$FULL_EXT\"\ndone\necho \"Install app extension modules...\"\nfind \"$CODESIGNING_FOLDER_PATH/app\" -name \"*.so\" | while read FULL_EXT; do\n install_dylib app/ \"$FULL_EXT\"\ndone\n\n# Clean up dylib template \nrm -f \"$CODESIGNING_FOLDER_PATH/dylib-Info-template.plist\"\necho \"Signing frameworks as $EXPANDED_CODE_SIGN_IDENTITY_NAME ($EXPANDED_CODE_SIGN_IDENTITY)...\"\nfind \"$CODESIGNING_FOLDER_PATH/Frameworks\" -name \"*.framework\" -exec /usr/bin/codesign --force --sign \"$EXPANDED_CODE_SIGN_IDENTITY\" ${OTHER_CODE_SIGN_FLAGS:-} -o runtime --timestamp=none --preserve-metadata=identifier,entitlements,flags --generate-entitlement-der \"{}\" \\; \n"; + showEnvVarsInLog = 0; }; /* End PBXShellScriptBuildPhase section */ diff --git a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m index db00d43da85cbc..ac78456a61e65e 100644 --- a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m +++ b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m @@ -50,6 +50,8 @@ - (void)testPython { // Enforce UTF-8 encoding for stderr, stdout, file-system encoding and locale. // See https://docs.python.org/3/library/os.html#python-utf-8-mode. preconfig.utf8_mode = 1; + // Use the system logger for stdout/err + config.use_system_logger = 1; // Don't buffer stdio. We want output to appears in the log immediately config.buffered_stdio = 0; // Don't write bytecode; we can't modify the app bundle From 5eb7fd4d0fc37b91058086181afebec41e66e5ad Mon Sep 17 00:00:00 2001 From: Wulian <1055917385@qq.com> Date: Mon, 9 Dec 2024 20:24:26 +0800 Subject: [PATCH 27/55] gh-127732: Add Windows Server 2025 detection to platform module (GH-127733) --- Lib/platform.py | 3 ++- .../Library/2024-12-08-08-36-18.gh-issue-127732.UEKxoa.rst | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-08-08-36-18.gh-issue-127732.UEKxoa.rst diff --git a/Lib/platform.py b/Lib/platform.py index 239e660cd1621d..1f6baed66d3df9 100644 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -353,7 +353,8 @@ def _wmi_query(table, *keys): ] _WIN32_SERVER_RELEASES = [ - ((10, 1, 0), "post2022Server"), + ((10, 1, 0), "post2025Server"), + ((10, 0, 26100), "2025Server"), ((10, 0, 20348), "2022Server"), ((10, 0, 17763), "2019Server"), ((6, 4, 0), "2016Server"), diff --git a/Misc/NEWS.d/next/Library/2024-12-08-08-36-18.gh-issue-127732.UEKxoa.rst b/Misc/NEWS.d/next/Library/2024-12-08-08-36-18.gh-issue-127732.UEKxoa.rst new file mode 100644 index 00000000000000..44821300f6e4e6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-08-08-36-18.gh-issue-127732.UEKxoa.rst @@ -0,0 +1 @@ +The :mod:`platform` module now correctly detects Windows Server 2025. From e85f2f1703e0f79cfd0d0e3010190b71c0eb18da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 9 Dec 2024 19:02:22 +0100 Subject: [PATCH 28/55] gh-127637: add tests for `dis` command-line interface (#127759) --- Lib/dis.py | 4 +- Lib/test/test_dis.py | 122 +++++++++++++++++- ...-12-09-12-35-44.gh-issue-127637.KLx-9I.rst | 1 + 3 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-12-09-12-35-44.gh-issue-127637.KLx-9I.rst diff --git a/Lib/dis.py b/Lib/dis.py index 6b3e9ef8399e1c..aa22404c6687e1 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -1115,7 +1115,7 @@ def dis(self): return output.getvalue() -def main(): +def main(args=None): import argparse parser = argparse.ArgumentParser() @@ -1128,7 +1128,7 @@ def main(): parser.add_argument('-S', '--specialized', action='store_true', help='show specialized bytecode') parser.add_argument('infile', nargs='?', default='-') - args = parser.parse_args() + args = parser.parse_args(args=args) if args.infile == '-': name = '' source = sys.stdin.buffer.read() diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 55890e58ed4bae..c719f571152d61 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -5,15 +5,19 @@ import dis import functools import io +import itertools +import opcode import re import sys +import tempfile +import textwrap import types import unittest from test.support import (captured_stdout, requires_debug_ranges, - requires_specialization, cpython_only) + requires_specialization, cpython_only, + os_helper) from test.support.bytecode_helper import BytecodeTestCase -import opcode CACHE = dis.opmap["CACHE"] @@ -2426,5 +2430,119 @@ def _unroll_caches_as_Instructions(instrs, show_caches=False): False, None, None, instr.positions) +class TestDisCLI(unittest.TestCase): + + def setUp(self): + self.filename = tempfile.mktemp() + self.addCleanup(os_helper.unlink, self.filename) + + @staticmethod + def text_normalize(string): + """Dedent *string* and strip it from its surrounding whitespaces. + + This method is used by the other utility functions so that any + string to write or to match against can be freely indented. + """ + return textwrap.dedent(string).strip() + + def set_source(self, content): + with open(self.filename, 'w') as fp: + fp.write(self.text_normalize(content)) + + def invoke_dis(self, *flags): + output = io.StringIO() + with contextlib.redirect_stdout(output): + dis.main(args=[*flags, self.filename]) + return self.text_normalize(output.getvalue()) + + def check_output(self, source, expect, *flags): + with self.subTest(source=source, flags=flags): + self.set_source(source) + res = self.invoke_dis(*flags) + expect = self.text_normalize(expect) + self.assertListEqual(res.splitlines(), expect.splitlines()) + + def test_invocation(self): + # test various combinations of parameters + base_flags = [ + ('-C', '--show-caches'), + ('-O', '--show-offsets'), + ('-P', '--show-positions'), + ('-S', '--specialized'), + ] + + self.set_source(''' + def f(): + print(x) + return None + ''') + + for r in range(1, len(base_flags) + 1): + for choices in itertools.combinations(base_flags, r=r): + for args in itertools.product(*choices): + with self.subTest(args=args[1:]): + _ = self.invoke_dis(*args) + + with self.assertRaises(SystemExit): + # suppress argparse error message + with contextlib.redirect_stderr(io.StringIO()): + _ = self.invoke_dis('--unknown') + + def test_show_cache(self): + # test 'python -m dis -C/--show-caches' + source = 'print()' + expect = ''' + 0 RESUME 0 + + 1 LOAD_NAME 0 (print) + PUSH_NULL + CALL 0 + CACHE 0 (counter: 0) + CACHE 0 (func_version: 0) + CACHE 0 + POP_TOP + LOAD_CONST 0 (None) + RETURN_VALUE + ''' + for flag in ['-C', '--show-caches']: + self.check_output(source, expect, flag) + + def test_show_offsets(self): + # test 'python -m dis -O/--show-offsets' + source = 'pass' + expect = ''' + 0 0 RESUME 0 + + 1 2 LOAD_CONST 0 (None) + 4 RETURN_VALUE + ''' + for flag in ['-O', '--show-offsets']: + self.check_output(source, expect, flag) + + def test_show_positions(self): + # test 'python -m dis -P/--show-positions' + source = 'pass' + expect = ''' + 0:0-1:0 RESUME 0 + + 1:0-1:4 LOAD_CONST 0 (None) + 1:0-1:4 RETURN_VALUE + ''' + for flag in ['-P', '--show-positions']: + self.check_output(source, expect, flag) + + def test_specialized_code(self): + # test 'python -m dis -S/--specialized' + source = 'pass' + expect = ''' + 0 RESUME 0 + + 1 LOAD_CONST_IMMORTAL 0 (None) + RETURN_VALUE + ''' + for flag in ['-S', '--specialized']: + self.check_output(source, expect, flag) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Tests/2024-12-09-12-35-44.gh-issue-127637.KLx-9I.rst b/Misc/NEWS.d/next/Tests/2024-12-09-12-35-44.gh-issue-127637.KLx-9I.rst new file mode 100644 index 00000000000000..ac5d9827b07199 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-12-09-12-35-44.gh-issue-127637.KLx-9I.rst @@ -0,0 +1 @@ +Add tests for the :mod:`dis` command-line interface. Patch by Bénédikt Tran. From 5c89adf385aaaca97c2ee9074f8b1fda0f57ad26 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 9 Dec 2024 18:31:22 +0000 Subject: [PATCH 29/55] GH-127456: pathlib ABCs: add protocol for path parser (#127494) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the default value of `PurePathBase.parser` from `ParserBase()` to `posixpath`. As a result, user subclasses of `PurePathBase` and `PathBase` use POSIX path syntax by default, which is very often desirable. Move `pathlib._abc.ParserBase` to `pathlib._types.Parser`, and convert it to a runtime-checkable protocol. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/pathlib/_abc.py | 56 +-------------------- Lib/pathlib/_types.py | 22 ++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 61 +++-------------------- 3 files changed, 32 insertions(+), 107 deletions(-) create mode 100644 Lib/pathlib/_types.py diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 309eab2ff855c3..f68685f21d6d79 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -13,6 +13,7 @@ import functools import operator +import posixpath from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -33,59 +34,6 @@ def _is_case_sensitive(parser): return parser.normcase('Aa') == 'Aa' - -class ParserBase: - """Base class for path parsers, which do low-level path manipulation. - - Path parsers provide a subset of the os.path API, specifically those - functions needed to provide PurePathBase functionality. Each PurePathBase - subclass references its path parser via a 'parser' class attribute. - - Every method in this base class raises an UnsupportedOperation exception. - """ - - @classmethod - def _unsupported_msg(cls, attribute): - return f"{cls.__name__}.{attribute} is unsupported" - - @property - def sep(self): - """The character used to separate path components.""" - raise UnsupportedOperation(self._unsupported_msg('sep')) - - def join(self, path, *paths): - """Join path segments.""" - raise UnsupportedOperation(self._unsupported_msg('join()')) - - def split(self, path): - """Split the path into a pair (head, tail), where *head* is everything - before the final path separator, and *tail* is everything after. - Either part may be empty. - """ - raise UnsupportedOperation(self._unsupported_msg('split()')) - - def splitdrive(self, path): - """Split the path into a 2-item tuple (drive, tail), where *drive* is - a device name or mount point, and *tail* is everything after the - drive. Either part may be empty.""" - raise UnsupportedOperation(self._unsupported_msg('splitdrive()')) - - def splitext(self, path): - """Split the path into a pair (root, ext), where *ext* is empty or - begins with a period and contains at most one period, - and *root* is everything before the extension.""" - raise UnsupportedOperation(self._unsupported_msg('splitext()')) - - def normcase(self, path): - """Normalize the case of the path.""" - raise UnsupportedOperation(self._unsupported_msg('normcase()')) - - def isabs(self, path): - """Returns whether the path is absolute, i.e. unaffected by the - current directory or drive.""" - raise UnsupportedOperation(self._unsupported_msg('isabs()')) - - class PathGlobber(_GlobberBase): """ Class providing shell-style globbing for path objects. @@ -115,7 +63,7 @@ class PurePathBase: # the `__init__()` method. '_raw_paths', ) - parser = ParserBase() + parser = posixpath _globber = PathGlobber def __init__(self, *args): diff --git a/Lib/pathlib/_types.py b/Lib/pathlib/_types.py new file mode 100644 index 00000000000000..60df94d0b46049 --- /dev/null +++ b/Lib/pathlib/_types.py @@ -0,0 +1,22 @@ +""" +Protocols for supporting classes in pathlib. +""" +from typing import Protocol, runtime_checkable + + +@runtime_checkable +class Parser(Protocol): + """Protocol for path parsers, which do low-level path manipulation. + + Path parsers provide a subset of the os.path API, specifically those + functions needed to provide PurePathBase functionality. Each PurePathBase + subclass references its path parser via a 'parser' class attribute. + """ + + sep: str + def join(self, path: str, *paths: str) -> str: ... + def split(self, path: str) -> tuple[str, str]: ... + def splitdrive(self, path: str) -> tuple[str, str]: ... + def splitext(self, path: str) -> tuple[str, str]: ... + def normcase(self, path: str) -> str: ... + def isabs(self, path: str) -> bool: ... diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 675abf30a9f13c..dd9425ce393623 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -5,7 +5,8 @@ import stat import unittest -from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase +from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase +from pathlib._types import Parser import posixpath from test.support.os_helper import TESTFN @@ -31,22 +32,6 @@ def test_is_notimplemented(self): self.assertTrue(issubclass(UnsupportedOperation, NotImplementedError)) self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError)) - -class ParserBaseTest(unittest.TestCase): - cls = ParserBase - - def test_unsupported_operation(self): - m = self.cls() - e = UnsupportedOperation - with self.assertRaises(e): - m.sep - self.assertRaises(e, m.join, 'foo') - self.assertRaises(e, m.split, 'foo') - self.assertRaises(e, m.splitdrive, 'foo') - self.assertRaises(e, m.splitext, 'foo') - self.assertRaises(e, m.normcase, 'foo') - self.assertRaises(e, m.isabs, 'foo') - # # Tests for the pure classes. # @@ -55,37 +40,6 @@ def test_unsupported_operation(self): class PurePathBaseTest(unittest.TestCase): cls = PurePathBase - def test_unsupported_operation_pure(self): - p = self.cls('foo') - e = UnsupportedOperation - with self.assertRaises(e): - p.drive - with self.assertRaises(e): - p.root - with self.assertRaises(e): - p.anchor - with self.assertRaises(e): - p.parts - with self.assertRaises(e): - p.parent - with self.assertRaises(e): - p.parents - with self.assertRaises(e): - p.name - with self.assertRaises(e): - p.stem - with self.assertRaises(e): - p.suffix - with self.assertRaises(e): - p.suffixes - self.assertRaises(e, p.with_name, 'bar') - self.assertRaises(e, p.with_stem, 'bar') - self.assertRaises(e, p.with_suffix, '.txt') - self.assertRaises(e, p.relative_to, '') - self.assertRaises(e, p.is_relative_to, '') - self.assertRaises(e, p.is_absolute) - self.assertRaises(e, p.match, '*') - def test_magic_methods(self): P = self.cls self.assertFalse(hasattr(P, '__fspath__')) @@ -100,12 +54,11 @@ def test_magic_methods(self): self.assertIs(P.__ge__, object.__ge__) def test_parser(self): - self.assertIsInstance(self.cls.parser, ParserBase) + self.assertIs(self.cls.parser, posixpath) class DummyPurePath(PurePathBase): __slots__ = () - parser = posixpath def __eq__(self, other): if not isinstance(other, DummyPurePath): @@ -136,6 +89,9 @@ def setUp(self): self.sep = self.parser.sep self.altsep = self.parser.altsep + def test_parser(self): + self.assertIsInstance(self.cls.parser, Parser) + def test_constructor_common(self): P = self.cls p = P('a') @@ -1359,8 +1315,8 @@ def test_unsupported_operation(self): self.assertRaises(e, p.write_bytes, b'foo') self.assertRaises(e, p.write_text, 'foo') self.assertRaises(e, p.iterdir) - self.assertRaises(e, p.glob, '*') - self.assertRaises(e, p.rglob, '*') + self.assertRaises(e, lambda: list(p.glob('*'))) + self.assertRaises(e, lambda: list(p.rglob('*'))) self.assertRaises(e, lambda: list(p.walk())) self.assertRaises(e, p.expanduser) self.assertRaises(e, p.readlink) @@ -1411,7 +1367,6 @@ class DummyPath(PathBase): memory. """ __slots__ = () - parser = posixpath _files = {} _directories = {} From 3b18af964da9814474a5db9e502962c7e0593e8d Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 10 Dec 2024 03:32:58 +0100 Subject: [PATCH 30/55] gh-127629: Add ctypes to the Emscripten build (#127683) Adds tooling to build libffi and add ctypes to the stdlib for Emscripten. --- ...-12-06-12-47-52.gh-issue-127629.tD-ERQ.rst | 1 + Tools/wasm/README.md | 1 + Tools/wasm/emscripten/__main__.py | 64 +++++++++++++++---- Tools/wasm/emscripten/make_libffi.sh | 21 ++++++ 4 files changed, 76 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-12-06-12-47-52.gh-issue-127629.tD-ERQ.rst create mode 100755 Tools/wasm/emscripten/make_libffi.sh diff --git a/Misc/NEWS.d/next/Build/2024-12-06-12-47-52.gh-issue-127629.tD-ERQ.rst b/Misc/NEWS.d/next/Build/2024-12-06-12-47-52.gh-issue-127629.tD-ERQ.rst new file mode 100644 index 00000000000000..52ee84f5d6f6c3 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-12-06-12-47-52.gh-issue-127629.tD-ERQ.rst @@ -0,0 +1 @@ +Emscripten builds now include ctypes support. diff --git a/Tools/wasm/README.md b/Tools/wasm/README.md index 4802d9683de52e..2e0fa2fb533d67 100644 --- a/Tools/wasm/README.md +++ b/Tools/wasm/README.md @@ -61,6 +61,7 @@ or you can break it out into four separate steps: ```shell python Tools/wasm/emscripten configure-build-python python Tools/wasm/emscripten make-build-python +python Tools/wasm/emscripten make-libffi python Tools/wasm/emscripten configure-host python Tools/wasm/emscripten make-host ``` diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index 6843b6fdeceb8c..4a53e0bd1bee1b 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -9,6 +9,7 @@ import sys import sysconfig import tempfile +from urllib.request import urlopen from pathlib import Path from textwrap import dedent @@ -22,9 +23,13 @@ CHECKOUT = EMSCRIPTEN_DIR.parent.parent.parent CROSS_BUILD_DIR = CHECKOUT / "cross-build" -BUILD_DIR = CROSS_BUILD_DIR / "build" +NATIVE_BUILD_DIR = CROSS_BUILD_DIR / "build" HOST_TRIPLE = "wasm32-emscripten" -HOST_DIR = CROSS_BUILD_DIR / HOST_TRIPLE + +DOWNLOAD_DIR = CROSS_BUILD_DIR / HOST_TRIPLE / "build" +HOST_BUILD_DIR = CROSS_BUILD_DIR / HOST_TRIPLE / "build" +HOST_DIR = HOST_BUILD_DIR / "python" +PREFIX_DIR = CROSS_BUILD_DIR / HOST_TRIPLE / "prefix" LOCAL_SETUP = CHECKOUT / "Modules" / "Setup.local" LOCAL_SETUP_MARKER = "# Generated by Tools/wasm/emscripten.py\n".encode("utf-8") @@ -118,16 +123,16 @@ def build_platform(): def build_python_path(): """The path to the build Python binary.""" - binary = BUILD_DIR / "python" + binary = NATIVE_BUILD_DIR / "python" if not binary.is_file(): binary = binary.with_suffix(".exe") if not binary.is_file(): - raise FileNotFoundError("Unable to find `python(.exe)` in " f"{BUILD_DIR}") + raise FileNotFoundError("Unable to find `python(.exe)` in " f"{NATIVE_BUILD_DIR}") return binary -@subdir(BUILD_DIR, clean_ok=True) +@subdir(NATIVE_BUILD_DIR, clean_ok=True) def configure_build_python(context, working_dir): """Configure the build/host Python.""" if LOCAL_SETUP.exists(): @@ -143,7 +148,7 @@ def configure_build_python(context, working_dir): call(configure, quiet=context.quiet) -@subdir(BUILD_DIR) +@subdir(NATIVE_BUILD_DIR) def make_build_python(context, working_dir): """Make/build the build Python.""" call(["make", "--jobs", str(cpu_count()), "all"], quiet=context.quiet) @@ -159,6 +164,23 @@ def make_build_python(context, working_dir): print(f"🎉 {binary} {version}") +@subdir(HOST_BUILD_DIR, clean_ok=True) +def make_emscripten_libffi(context, working_dir): + shutil.rmtree(working_dir / "libffi-3.4.6", ignore_errors=True) + with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp_file: + with urlopen( + "https://github.com/libffi/libffi/releases/download/v3.4.6/libffi-3.4.6.tar.gz" + ) as response: + shutil.copyfileobj(response, tmp_file) + shutil.unpack_archive(tmp_file.name, working_dir) + call( + [EMSCRIPTEN_DIR / "make_libffi.sh"], + env=updated_env({"PREFIX": PREFIX_DIR}), + cwd=working_dir / "libffi-3.4.6", + quiet=context.quiet, + ) + + @subdir(HOST_DIR, clean_ok=True) def configure_emscripten_python(context, working_dir): """Configure the emscripten/host build.""" @@ -168,7 +190,7 @@ def configure_emscripten_python(context, working_dir): emscripten_build_dir = working_dir.relative_to(CHECKOUT) - python_build_dir = BUILD_DIR / "build" + python_build_dir = NATIVE_BUILD_DIR / "build" lib_dirs = list(python_build_dir.glob("lib.*")) assert ( len(lib_dirs) == 1 @@ -183,12 +205,18 @@ def configure_emscripten_python(context, working_dir): sysconfig_data += "-pydebug" host_runner = context.host_runner - env_additions = {"CONFIG_SITE": config_site, "HOSTRUNNER": host_runner} + pkg_config_path_dir = (PREFIX_DIR / "lib/pkgconfig/").resolve() + env_additions = { + "CONFIG_SITE": config_site, + "HOSTRUNNER": host_runner, + "EM_PKG_CONFIG_PATH": str(pkg_config_path_dir), + } build_python = os.fsdecode(build_python_path()) configure = [ "emconfigure", os.path.relpath(CHECKOUT / "configure", working_dir), "CFLAGS=-DPY_CALL_TRAMPOLINE -sUSE_BZIP2", + "PKG_CONFIG=pkg-config", f"--host={HOST_TRIPLE}", f"--build={build_platform()}", f"--with-build-python={build_python}", @@ -197,7 +225,7 @@ def configure_emscripten_python(context, working_dir): "--disable-ipv6", "--enable-big-digits=30", "--enable-wasm-dynamic-linking", - f"--prefix={HOST_DIR}", + f"--prefix={PREFIX_DIR}", ] if pydebug: configure.append("--with-pydebug") @@ -264,6 +292,7 @@ def build_all(context): steps = [ configure_build_python, make_build_python, + make_emscripten_libffi, configure_emscripten_python, make_emscripten_python, ] @@ -292,6 +321,9 @@ def main(): configure_build = subcommands.add_parser( "configure-build-python", help="Run `configure` for the " "build Python" ) + make_libffi_cmd = subcommands.add_parser( + "make-libffi", help="Clone libffi repo, configure and build it for emscripten" + ) make_build = subcommands.add_parser( "make-build-python", help="Run `make` for the build Python" ) @@ -299,11 +331,20 @@ def main(): "configure-host", help="Run `configure` for the host/emscripten (pydebug builds are inferred from the build Python)", ) - make_host = subcommands.add_parser("make-host", help="Run `make` for the host/emscripten") + make_host = subcommands.add_parser( + "make-host", help="Run `make` for the host/emscripten" + ) clean = subcommands.add_parser( "clean", help="Delete files and directories created by this script" ) - for subcommand in build, configure_build, make_build, configure_host, make_host: + for subcommand in ( + build, + configure_build, + make_libffi_cmd, + make_build, + configure_host, + make_host, + ): subcommand.add_argument( "--quiet", action="store_true", @@ -336,6 +377,7 @@ def main(): context = parser.parse_args() dispatch = { + "make-libffi": make_emscripten_libffi, "configure-build-python": configure_build_python, "make-build-python": make_build_python, "configure-host": configure_emscripten_python, diff --git a/Tools/wasm/emscripten/make_libffi.sh b/Tools/wasm/emscripten/make_libffi.sh new file mode 100755 index 00000000000000..3c75c4d5127102 --- /dev/null +++ b/Tools/wasm/emscripten/make_libffi.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set +e + +export CFLAGS="-O2 -fPIC -DWASM_BIGINT" +export CXXFLAGS="$CFLAGS" + +# Build paths +export CPATH="$PREFIX/include" +export PKG_CONFIG_PATH="$PREFIX/lib/pkgconfig" +export EM_PKG_CONFIG_PATH="$PKG_CONFIG_PATH" + +# Specific variables for cross-compilation +export CHOST="wasm32-unknown-linux" # wasm32-unknown-emscripten + +emconfigure ./configure --host=$CHOST --prefix="$PREFIX" --enable-static --disable-shared --disable-dependency-tracking \ + --disable-builddir --disable-multi-os-directory --disable-raw-api --disable-docs + +make install +# Some forgotten headers? +cp fficonfig.h $PREFIX/include/ +cp include/ffi_common.h $PREFIX/include/ From 58c753827ac7aa3d7f1495ac206c28bf2f6c67e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 03:48:38 +0100 Subject: [PATCH 31/55] gh-125420: implement `Sequence.index` API on `memoryview` objects (#125446) --- Doc/library/stdtypes.rst | 9 ++ Lib/test/test_memoryview.py | 26 ++++++ ...-10-14-13-28-16.gh-issue-125420.hNKixM.rst | 2 + Objects/clinic/memoryobject.c.h | 48 +++++++++- Objects/memoryobject.c | 87 +++++++++++++++++++ 5 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-13-28-16.gh-issue-125420.hNKixM.rst diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 4f4fc9fba63120..f4e635dd61e3dc 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -4149,6 +4149,15 @@ copying. .. versionchanged:: 3.5 The source format is no longer restricted when casting to a byte view. + .. method:: index(value, start=0, stop=sys.maxsize, /) + + Return the index of the first occurrence of *value* (at or after + index *start* and before index *stop*). + + Raises a :exc:`ValueError` if *value* cannot be found. + + .. versionadded:: next + There are also several readonly attributes available: .. attribute:: obj diff --git a/Lib/test/test_memoryview.py b/Lib/test/test_memoryview.py index 2d4bf5f1408df8..8f07828c19790d 100644 --- a/Lib/test/test_memoryview.py +++ b/Lib/test/test_memoryview.py @@ -15,6 +15,7 @@ import pickle import struct +from itertools import product from test.support import import_helper @@ -58,6 +59,31 @@ def test_getitem(self): for tp in self._types: self.check_getitem_with_type(tp) + def test_index(self): + for tp in self._types: + b = tp(self._source) + m = self._view(b) # may be a sub-view + l = m.tolist() + k = 2 * len(self._source) + + for chi in self._source: + if chi in l: + self.assertEqual(m.index(chi), l.index(chi)) + else: + self.assertRaises(ValueError, m.index, chi) + + for start, stop in product(range(-k, k), range(-k, k)): + index = -1 + try: + index = l.index(chi, start, stop) + except ValueError: + pass + + if index == -1: + self.assertRaises(ValueError, m.index, chi, start, stop) + else: + self.assertEqual(m.index(chi, start, stop), index) + def test_iter(self): for tp in self._types: b = tp(self._source) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-13-28-16.gh-issue-125420.hNKixM.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-13-28-16.gh-issue-125420.hNKixM.rst new file mode 100644 index 00000000000000..6ed823175f6754 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-13-28-16.gh-issue-125420.hNKixM.rst @@ -0,0 +1,2 @@ +Add :meth:`memoryview.index` to :class:`memoryview` objects. Patch by +Bénédikt Tran. diff --git a/Objects/clinic/memoryobject.c.h b/Objects/clinic/memoryobject.c.h index 185d7819b6b84a..9b8a23317f0da0 100644 --- a/Objects/clinic/memoryobject.c.h +++ b/Objects/clinic/memoryobject.c.h @@ -418,4 +418,50 @@ memoryview_hex(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t nargs exit: return return_value; } -/*[clinic end generated code: output=0a93f08110630633 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(memoryview_index__doc__, +"index($self, value, start=0, stop=sys.maxsize, /)\n" +"--\n" +"\n" +"Return the index of the first occurrence of a value.\n" +"\n" +"Raises ValueError if the value is not present."); + +#define MEMORYVIEW_INDEX_METHODDEF \ + {"index", _PyCFunction_CAST(memoryview_index), METH_FASTCALL, memoryview_index__doc__}, + +static PyObject * +memoryview_index_impl(PyMemoryViewObject *self, PyObject *value, + Py_ssize_t start, Py_ssize_t stop); + +static PyObject * +memoryview_index(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *value; + Py_ssize_t start = 0; + Py_ssize_t stop = PY_SSIZE_T_MAX; + + if (!_PyArg_CheckPositional("index", nargs, 1, 3)) { + goto exit; + } + value = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndexNotNone(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndexNotNone(args[2], &stop)) { + goto exit; + } +skip_optional: + return_value = memoryview_index_impl(self, value, start, stop); + +exit: + return return_value; +} +/*[clinic end generated code: output=2742d371dba7314f input=a9049054013a1b77]*/ diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 25634f997ac66b..345aa79c328125 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -2748,6 +2748,92 @@ static PySequenceMethods memory_as_sequence = { }; +/**************************************************************************/ +/* Lookup */ +/**************************************************************************/ + +/*[clinic input] +memoryview.index + + value: object + start: slice_index(accept={int}) = 0 + stop: slice_index(accept={int}, c_default="PY_SSIZE_T_MAX") = sys.maxsize + / + +Return the index of the first occurrence of a value. + +Raises ValueError if the value is not present. +[clinic start generated code]*/ + +static PyObject * +memoryview_index_impl(PyMemoryViewObject *self, PyObject *value, + Py_ssize_t start, Py_ssize_t stop) +/*[clinic end generated code: output=e0185e3819e549df input=0697a0165bf90b5a]*/ +{ + const Py_buffer *view = &self->view; + CHECK_RELEASED(self); + + if (view->ndim == 0) { + PyErr_SetString(PyExc_TypeError, "invalid lookup on 0-dim memory"); + return NULL; + } + + if (view->ndim == 1) { + Py_ssize_t n = view->shape[0]; + + if (start < 0) { + start = Py_MAX(start + n, 0); + } + + if (stop < 0) { + stop = Py_MAX(stop + n, 0); + } + + stop = Py_MIN(stop, n); + assert(stop >= 0); + assert(stop <= n); + + start = Py_MIN(start, stop); + assert(0 <= start); + assert(start <= stop); + + PyObject *obj = _PyObject_CAST(self); + for (Py_ssize_t index = start; index < stop; index++) { + // Note: while memoryviews can be mutated during iterations + // when calling the == operator, their shape cannot. As such, + // it is safe to assume that the index remains valid for the + // entire loop. + assert(index < n); + + PyObject *item = memory_item(obj, index); + if (item == NULL) { + return NULL; + } + if (item == value) { + Py_DECREF(item); + return PyLong_FromSsize_t(index); + } + int contained = PyObject_RichCompareBool(item, value, Py_EQ); + Py_DECREF(item); + if (contained > 0) { // more likely than 'contained < 0' + return PyLong_FromSsize_t(index); + } + else if (contained < 0) { + return NULL; + } + } + + PyErr_SetString(PyExc_ValueError, "memoryview.index(x): x not found"); + return NULL; + } + + PyErr_SetString(PyExc_NotImplementedError, + "multi-dimensional lookup is not implemented"); + return NULL; + +} + + /**************************************************************************/ /* Comparisons */ /**************************************************************************/ @@ -3284,6 +3370,7 @@ static PyMethodDef memory_methods[] = { MEMORYVIEW_CAST_METHODDEF MEMORYVIEW_TOREADONLY_METHODDEF MEMORYVIEW__FROM_FLAGS_METHODDEF + MEMORYVIEW_INDEX_METHODDEF {"__enter__", memory_enter, METH_NOARGS, NULL}, {"__exit__", memory_exit, METH_VARARGS, memory_exit_doc}, {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, From 3983527c3a6b389e373a233e514919555853ccb3 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Mon, 9 Dec 2024 20:55:20 -0800 Subject: [PATCH 32/55] gh-127651: Use __file__ in diagnostics if origin is missing (#127660) See the left hand side in https://github.com/python/cpython/pull/123929/files#diff-c22186367cbe20233e843261998dc027ae5f1f8c0d2e778abfa454ae74cc59deL2840-L2849 --------- Co-authored-by: Serhiy Storchaka --- Lib/test/test_import/__init__.py | 48 ++++++++++++++++++- ...-12-06-01-09-40.gh-issue-127651.80cm6j.rst | 1 + Python/ceval.c | 22 +++++++-- 3 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-06-01-09-40.gh-issue-127651.80cm6j.rst diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index c52b7f3e09bea1..83efbc1e25e77a 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -807,6 +807,50 @@ def test_issue105979(self): self.assertIn("Frozen object named 'x' is invalid", str(cm.exception)) + def test_frozen_module_from_import_error(self): + with self.assertRaises(ImportError) as cm: + from os import this_will_never_exist + self.assertIn( + f"cannot import name 'this_will_never_exist' from 'os' ({os.__file__})", + str(cm.exception), + ) + with self.assertRaises(ImportError) as cm: + from sys import this_will_never_exist + self.assertIn( + "cannot import name 'this_will_never_exist' from 'sys' (unknown location)", + str(cm.exception), + ) + + scripts = [ + """ +import os +os.__spec__.has_location = False +os.__file__ = [] +from os import this_will_never_exist +""", + """ +import os +os.__spec__.has_location = False +del os.__file__ +from os import this_will_never_exist +""", + """ +import os +os.__spec__.origin = [] +os.__file__ = [] +from os import this_will_never_exist +""" + ] + for script in scripts: + with self.subTest(script=script): + expected_error = ( + b"cannot import name 'this_will_never_exist' " + b"from 'os' (unknown location)" + ) + popen = script_helper.spawn_python("-c", script) + stdout, stderr = popen.communicate() + self.assertIn(expected_error, stdout) + def test_script_shadowing_stdlib(self): script_errors = [ ( @@ -1068,7 +1112,7 @@ class substr(str): except AttributeError as e: print(str(e)) -fractions.__spec__.origin = 0 +fractions.__spec__.origin = [] try: fractions.Fraction except AttributeError as e: @@ -1092,7 +1136,7 @@ class substr(str): except ImportError as e: print(str(e)) -fractions.__spec__.origin = 0 +fractions.__spec__.origin = [] try: from fractions import Fraction except ImportError as e: diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-06-01-09-40.gh-issue-127651.80cm6j.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-06-01-09-40.gh-issue-127651.80cm6j.rst new file mode 100644 index 00000000000000..92b18b082eb30e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-06-01-09-40.gh-issue-127651.80cm6j.rst @@ -0,0 +1 @@ +When raising :exc:`ImportError` for missing symbols in ``from`` imports, use ``__file__`` in the error message if ``__spec__.origin`` is not a location diff --git a/Python/ceval.c b/Python/ceval.c index 6795a160506231..5eda033eced628 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2859,6 +2859,20 @@ _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) } } + if (origin == NULL) { + // Fall back to __file__ for diagnostics if we don't have + // an origin that is a location + origin = PyModule_GetFilenameObject(v); + if (origin == NULL) { + if (!PyErr_ExceptionMatches(PyExc_SystemError)) { + goto done; + } + // PyModule_GetFilenameObject raised "module filename missing" + _PyErr_Clear(tstate); + } + assert(origin == NULL || PyUnicode_Check(origin)); + } + if (is_possibly_shadowing_stdlib) { assert(origin); errmsg = PyUnicode_FromFormat( @@ -2919,9 +2933,11 @@ _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) } done_with_errmsg: - /* NULL checks for errmsg, mod_name, origin done by PyErr_SetImportError. */ - _PyErr_SetImportErrorWithNameFrom(errmsg, mod_name, origin, name); - Py_DECREF(errmsg); + if (errmsg != NULL) { + /* NULL checks for mod_name and origin done by _PyErr_SetImportErrorWithNameFrom */ + _PyErr_SetImportErrorWithNameFrom(errmsg, mod_name, origin, name); + Py_DECREF(errmsg); + } done: Py_XDECREF(origin); From 2233c303e476496fc4c85a29a1429a7e4b1f707b Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Tue, 10 Dec 2024 07:40:54 +0000 Subject: [PATCH 33/55] gh-126775: make linecache.checkcache threadsafe and GC re-entrency safe (#126776) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Bartosz Sławecki --- Lib/linecache.py | 13 ++++++++----- .../2024-11-13-10-44-25.gh-issue-126775.a3ubjh.rst | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-13-10-44-25.gh-issue-126775.a3ubjh.rst diff --git a/Lib/linecache.py b/Lib/linecache.py index 4b38a0464d8747..8ba2df73d5a8fb 100644 --- a/Lib/linecache.py +++ b/Lib/linecache.py @@ -49,14 +49,17 @@ def checkcache(filename=None): (This is not checked upon each call!)""" if filename is None: - filenames = list(cache.keys()) - elif filename in cache: - filenames = [filename] + # get keys atomically + filenames = cache.copy().keys() else: - return + filenames = [filename] for filename in filenames: - entry = cache[filename] + try: + entry = cache[filename] + except KeyError: + continue + if len(entry) == 1: # lazy cache entry, leave it lazy. continue diff --git a/Misc/NEWS.d/next/Library/2024-11-13-10-44-25.gh-issue-126775.a3ubjh.rst b/Misc/NEWS.d/next/Library/2024-11-13-10-44-25.gh-issue-126775.a3ubjh.rst new file mode 100644 index 00000000000000..429fc2aa17bd42 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-13-10-44-25.gh-issue-126775.a3ubjh.rst @@ -0,0 +1 @@ +Make :func:`linecache.checkcache` thread safe and GC re-entrancy safe. From 212448b1623b45f19c5529595e081da72a6521a0 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 10 Dec 2024 09:44:15 +0200 Subject: [PATCH 34/55] gh-127718: Add colour to `test.regrtest` output (#127719) --- Doc/library/test.rst | 4 + Lib/test/libregrtest/main.py | 9 +- Lib/test/libregrtest/result.py | 50 +++++++---- Lib/test/libregrtest/results.py | 89 +++++++++++++------ Lib/test/libregrtest/single.py | 22 +++-- Lib/test/test_regrtest.py | 46 ++++++++++ ...-12-07-15-28-31.gh-issue-127718.9dpLfi.rst | 1 + 7 files changed, 168 insertions(+), 53 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-07-15-28-31.gh-issue-127718.9dpLfi.rst diff --git a/Doc/library/test.rst b/Doc/library/test.rst index 04d28aee0f8672..b5b6e442e218fd 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -192,6 +192,10 @@ top-level directory where Python was built. On Windows, executing :program:`rt.bat` from your :file:`PCbuild` directory will run all regression tests. +.. versionadded:: 3.14 + Output is colorized by default and can be + :ref:`controlled using environment variables `. + :mod:`test.support` --- Utilities for the Python test suite =========================================================== diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 49209b0cec756e..dcbcc6790c68d8 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -6,6 +6,7 @@ import sysconfig import time import trace +from _colorize import get_colors # type: ignore[import-not-found] from typing import NoReturn from test.support import os_helper, MS_WINDOWS, flush_std_streams @@ -270,6 +271,9 @@ def _rerun_failed_tests(self, runtests: RunTests) -> RunTests: return runtests def rerun_failed_tests(self, runtests: RunTests) -> None: + ansi = get_colors() + red, reset = ansi.BOLD_RED, ansi.RESET + if self.python_cmd: # Temp patch for https://github.com/python/cpython/issues/94052 self.log( @@ -284,7 +288,10 @@ def rerun_failed_tests(self, runtests: RunTests) -> None: rerun_runtests = self._rerun_failed_tests(runtests) if self.results.bad: - print(count(len(self.results.bad), 'test'), "failed again:") + print( + f"{red}{count(len(self.results.bad), 'test')} " + f"failed again:{reset}" + ) printlist(self.results.bad) self.display_result(rerun_runtests) diff --git a/Lib/test/libregrtest/result.py b/Lib/test/libregrtest/result.py index 7553efe5e8abeb..daf7624366ee20 100644 --- a/Lib/test/libregrtest/result.py +++ b/Lib/test/libregrtest/result.py @@ -1,5 +1,6 @@ import dataclasses import json +from _colorize import get_colors # type: ignore[import-not-found] from typing import Any from .utils import ( @@ -105,54 +106,71 @@ def is_failed(self, fail_env_changed: bool) -> bool: return State.is_failed(self.state) def _format_failed(self): + ansi = get_colors() + red, reset = ansi.BOLD_RED, ansi.RESET if self.errors and self.failures: le = len(self.errors) lf = len(self.failures) error_s = "error" + ("s" if le > 1 else "") failure_s = "failure" + ("s" if lf > 1 else "") - return f"{self.test_name} failed ({le} {error_s}, {lf} {failure_s})" + return ( + f"{red}{self.test_name} failed " + f"({le} {error_s}, {lf} {failure_s}){reset}" + ) if self.errors: le = len(self.errors) error_s = "error" + ("s" if le > 1 else "") - return f"{self.test_name} failed ({le} {error_s})" + return f"{red}{self.test_name} failed ({le} {error_s}){reset}" if self.failures: lf = len(self.failures) failure_s = "failure" + ("s" if lf > 1 else "") - return f"{self.test_name} failed ({lf} {failure_s})" + return f"{red}{self.test_name} failed ({lf} {failure_s}){reset}" - return f"{self.test_name} failed" + return f"{red}{self.test_name} failed{reset}" def __str__(self) -> str: + ansi = get_colors() + green = ansi.GREEN + red = ansi.BOLD_RED + reset = ansi.RESET + yellow = ansi.YELLOW + match self.state: case State.PASSED: - return f"{self.test_name} passed" + return f"{green}{self.test_name} passed{reset}" case State.FAILED: - return self._format_failed() + return f"{red}{self._format_failed()}{reset}" case State.SKIPPED: - return f"{self.test_name} skipped" + return f"{yellow}{self.test_name} skipped{reset}" case State.UNCAUGHT_EXC: - return f"{self.test_name} failed (uncaught exception)" + return ( + f"{red}{self.test_name} failed (uncaught exception){reset}" + ) case State.REFLEAK: - return f"{self.test_name} failed (reference leak)" + return f"{red}{self.test_name} failed (reference leak){reset}" case State.ENV_CHANGED: - return f"{self.test_name} failed (env changed)" + return f"{red}{self.test_name} failed (env changed){reset}" case State.RESOURCE_DENIED: - return f"{self.test_name} skipped (resource denied)" + return f"{yellow}{self.test_name} skipped (resource denied){reset}" case State.INTERRUPTED: - return f"{self.test_name} interrupted" + return f"{yellow}{self.test_name} interrupted{reset}" case State.WORKER_FAILED: - return f"{self.test_name} worker non-zero exit code" + return ( + f"{red}{self.test_name} worker non-zero exit code{reset}" + ) case State.WORKER_BUG: - return f"{self.test_name} worker bug" + return f"{red}{self.test_name} worker bug{reset}" case State.DID_NOT_RUN: - return f"{self.test_name} ran no tests" + return f"{yellow}{self.test_name} ran no tests{reset}" case State.TIMEOUT: assert self.duration is not None, "self.duration is None" return f"{self.test_name} timed out ({format_duration(self.duration)})" case _: - raise ValueError("unknown result state: {state!r}") + raise ValueError( + f"{red}unknown result state: {{state!r}}{reset}" + ) def has_meaningful_duration(self): return State.has_meaningful_duration(self.state) diff --git a/Lib/test/libregrtest/results.py b/Lib/test/libregrtest/results.py index 9eda926966dc7e..a35934fc2c9ca8 100644 --- a/Lib/test/libregrtest/results.py +++ b/Lib/test/libregrtest/results.py @@ -1,5 +1,6 @@ import sys import trace +from _colorize import get_colors # type: ignore[import-not-found] from typing import TYPE_CHECKING from .runtests import RunTests @@ -59,19 +60,24 @@ def no_tests_run(self) -> bool: def get_state(self, fail_env_changed: bool) -> str: state = [] + ansi = get_colors() + green = ansi.GREEN + red = ansi.BOLD_RED + reset = ansi.RESET + yellow = ansi.YELLOW if self.bad: - state.append("FAILURE") + state.append(f"{red}FAILURE{reset}") elif fail_env_changed and self.env_changed: - state.append("ENV CHANGED") + state.append(f"{yellow}ENV CHANGED{reset}") elif self.no_tests_run(): - state.append("NO TESTS RAN") + state.append(f"{yellow}NO TESTS RAN{reset}") if self.interrupted: - state.append("INTERRUPTED") + state.append(f"{yellow}INTERRUPTED{reset}") if self.worker_bug: - state.append("WORKER BUG") + state.append(f"{red}WORKER BUG{reset}") if not state: - state.append("SUCCESS") + state.append(f"{green}SUCCESS{reset}") return ', '.join(state) @@ -197,27 +203,51 @@ def write_junit(self, filename: StrPath) -> None: f.write(s) def display_result(self, tests: TestTuple, quiet: bool, print_slowest: bool) -> None: + ansi = get_colors() + green = ansi.GREEN + red = ansi.BOLD_RED + reset = ansi.RESET + yellow = ansi.YELLOW + if print_slowest: self.test_times.sort(reverse=True) print() - print("10 slowest tests:") + print(f"{yellow}10 slowest tests:{reset}") for test_time, test in self.test_times[:10]: - print("- %s: %s" % (test, format_duration(test_time))) + print(f"- {test}: {format_duration(test_time)}") all_tests = [] omitted = set(tests) - self.get_executed() # less important - all_tests.append((sorted(omitted), "test", "{} omitted:")) + all_tests.append( + (sorted(omitted), "test", f"{yellow}{{}} omitted:{reset}") + ) if not quiet: - all_tests.append((self.skipped, "test", "{} skipped:")) - all_tests.append((self.resource_denied, "test", "{} skipped (resource denied):")) - all_tests.append((self.run_no_tests, "test", "{} run no tests:")) + all_tests.append( + (self.skipped, "test", f"{yellow}{{}} skipped:{reset}") + ) + all_tests.append( + ( + self.resource_denied, + "test", + f"{yellow}{{}} skipped (resource denied):{reset}", + ) + ) + all_tests.append( + (self.run_no_tests, "test", f"{yellow}{{}} run no tests:{reset}") + ) # more important - all_tests.append((self.env_changed, "test", "{} altered the execution environment (env changed):")) - all_tests.append((self.rerun, "re-run test", "{}:")) - all_tests.append((self.bad, "test", "{} failed:")) + all_tests.append( + ( + self.env_changed, + "test", + f"{yellow}{{}} altered the execution environment (env changed):{reset}", + ) + ) + all_tests.append((self.rerun, "re-run test", f"{yellow}{{}}:{reset}")) + all_tests.append((self.bad, "test", f"{red}{{}} failed:{reset}")) for tests_list, count_text, title_format in all_tests: if tests_list: @@ -229,26 +259,29 @@ def display_result(self, tests: TestTuple, quiet: bool, print_slowest: bool) -> if self.good and not quiet: print() text = count(len(self.good), "test") - text = f"{text} OK." - if (self.is_all_good() and len(self.good) > 1): + text = f"{green}{text} OK.{reset}" + if self.is_all_good() and len(self.good) > 1: text = f"All {text}" print(text) if self.interrupted: print() - print("Test suite interrupted by signal SIGINT.") + print(f"{yellow}Test suite interrupted by signal SIGINT.{reset}") def display_summary(self, first_runtests: RunTests, filtered: bool) -> None: # Total tests + ansi = get_colors() + red, reset, yellow = ansi.RED, ansi.RESET, ansi.YELLOW + stats = self.stats text = f'run={stats.tests_run:,}' if filtered: text = f"{text} (filtered)" report = [text] if stats.failures: - report.append(f'failures={stats.failures:,}') + report.append(f'{red}failures={stats.failures:,}{reset}') if stats.skipped: - report.append(f'skipped={stats.skipped:,}') + report.append(f'{yellow}skipped={stats.skipped:,}{reset}') print(f"Total tests: {' '.join(report)}") # Total test files @@ -263,14 +296,14 @@ def display_summary(self, first_runtests: RunTests, filtered: bool) -> None: if filtered: text = f"{text} (filtered)" report = [text] - for name, tests in ( - ('failed', self.bad), - ('env_changed', self.env_changed), - ('skipped', self.skipped), - ('resource_denied', self.resource_denied), - ('rerun', self.rerun), - ('run_no_tests', self.run_no_tests), + for name, tests, color in ( + ('failed', self.bad, red), + ('env_changed', self.env_changed, yellow), + ('skipped', self.skipped, yellow), + ('resource_denied', self.resource_denied, yellow), + ('rerun', self.rerun, yellow), + ('run_no_tests', self.run_no_tests, yellow), ): if tests: - report.append(f'{name}={len(tests)}') + report.append(f'{color}{name}={len(tests)}{reset}') print(f"Total test files: {' '.join(report)}") diff --git a/Lib/test/libregrtest/single.py b/Lib/test/libregrtest/single.py index 17323e7f9cf730..0e174f82abed28 100644 --- a/Lib/test/libregrtest/single.py +++ b/Lib/test/libregrtest/single.py @@ -7,6 +7,7 @@ import traceback import unittest +from _colorize import get_colors # type: ignore[import-not-found] from test import support from test.support import threading_helper @@ -161,6 +162,8 @@ def test_func(): def _runtest_env_changed_exc(result: TestResult, runtests: RunTests, display_failure: bool = True) -> None: # Handle exceptions, detect environment changes. + ansi = get_colors() + red, reset, yellow = ansi.RED, ansi.RESET, ansi.YELLOW # Reset the environment_altered flag to detect if a test altered # the environment @@ -181,18 +184,18 @@ def _runtest_env_changed_exc(result: TestResult, runtests: RunTests, _load_run_test(result, runtests) except support.ResourceDenied as exc: if not quiet and not pgo: - print(f"{test_name} skipped -- {exc}", flush=True) + print(f"{yellow}{test_name} skipped -- {exc}{reset}", flush=True) result.state = State.RESOURCE_DENIED return except unittest.SkipTest as exc: if not quiet and not pgo: - print(f"{test_name} skipped -- {exc}", flush=True) + print(f"{yellow}{test_name} skipped -- {exc}{reset}", flush=True) result.state = State.SKIPPED return except support.TestFailedWithDetails as exc: - msg = f"test {test_name} failed" + msg = f"{red}test {test_name} failed{reset}" if display_failure: - msg = f"{msg} -- {exc}" + msg = f"{red}{msg} -- {exc}{reset}" print(msg, file=sys.stderr, flush=True) result.state = State.FAILED result.errors = exc.errors @@ -200,9 +203,9 @@ def _runtest_env_changed_exc(result: TestResult, runtests: RunTests, result.stats = exc.stats return except support.TestFailed as exc: - msg = f"test {test_name} failed" + msg = f"{red}test {test_name} failed{reset}" if display_failure: - msg = f"{msg} -- {exc}" + msg = f"{red}{msg} -- {exc}{reset}" print(msg, file=sys.stderr, flush=True) result.state = State.FAILED result.stats = exc.stats @@ -217,7 +220,7 @@ def _runtest_env_changed_exc(result: TestResult, runtests: RunTests, except: if not pgo: msg = traceback.format_exc() - print(f"test {test_name} crashed -- {msg}", + print(f"{red}test {test_name} crashed -- {msg}{reset}", file=sys.stderr, flush=True) result.state = State.UNCAUGHT_EXC return @@ -300,6 +303,9 @@ def run_single_test(test_name: TestName, runtests: RunTests) -> TestResult: If runtests.use_junit, xml_data is a list containing each generated testsuite element. """ + ansi = get_colors() + red, reset, yellow = ansi.BOLD_RED, ansi.RESET, ansi.YELLOW + start_time = time.perf_counter() result = TestResult(test_name) pgo = runtests.pgo @@ -308,7 +314,7 @@ def run_single_test(test_name: TestName, runtests: RunTests) -> TestResult: except: if not pgo: msg = traceback.format_exc() - print(f"test {test_name} crashed -- {msg}", + print(f"{red}test {test_name} crashed -- {msg}{reset}", file=sys.stderr, flush=True) result.state = State.UNCAUGHT_EXC diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 0ab7a23aca1df8..ab46ccbf004a3a 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -4,6 +4,7 @@ Note: test_regrtest cannot be run twice in parallel. """ +import _colorize import contextlib import dataclasses import glob @@ -21,6 +22,7 @@ import tempfile import textwrap import unittest +import unittest.mock from xml.etree import ElementTree from test import support @@ -2487,5 +2489,49 @@ def test_sanitize_xml(self): 'valid t\xe9xt \u20ac') +from test.libregrtest.results import TestResults + + +class TestColorized(unittest.TestCase): + def test_test_result_get_state(self): + # Arrange + green = _colorize.ANSIColors.GREEN + red = _colorize.ANSIColors.BOLD_RED + reset = _colorize.ANSIColors.RESET + yellow = _colorize.ANSIColors.YELLOW + + good_results = TestResults() + good_results.good = ["good1", "good2"] + bad_results = TestResults() + bad_results.bad = ["bad1", "bad2"] + no_results = TestResults() + no_results.bad = [] + interrupted_results = TestResults() + interrupted_results.interrupted = True + interrupted_worker_bug = TestResults() + interrupted_worker_bug.interrupted = True + interrupted_worker_bug.worker_bug = True + + for results, expected in ( + (good_results, f"{green}SUCCESS{reset}"), + (bad_results, f"{red}FAILURE{reset}"), + (no_results, f"{yellow}NO TESTS RAN{reset}"), + (interrupted_results, f"{yellow}INTERRUPTED{reset}"), + ( + interrupted_worker_bug, + f"{yellow}INTERRUPTED{reset}, {red}WORKER BUG{reset}", + ), + ): + with self.subTest(results=results, expected=expected): + # Act + with unittest.mock.patch( + "_colorize.can_colorize", return_value=True + ): + result = results.get_state(fail_env_changed=False) + + # Assert + self.assertEqual(result, expected) + + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-12-07-15-28-31.gh-issue-127718.9dpLfi.rst b/Misc/NEWS.d/next/Library/2024-12-07-15-28-31.gh-issue-127718.9dpLfi.rst new file mode 100644 index 00000000000000..6c1b7bef610e8c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-07-15-28-31.gh-issue-127718.9dpLfi.rst @@ -0,0 +1 @@ +Add colour to :mod:`test.regrtest` output. Patch by Hugo van Kemenade. From 050d59bd1765de417bf4ec8b5c3cbdac65695f5e Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Tue, 10 Dec 2024 02:17:25 -0600 Subject: [PATCH 35/55] add help blurb for "extralargefile" (#127710) --- Lib/test/libregrtest/cmdline.py | 42 +++++++++++++++++---------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py index 0c94fcc1907071..bf9a71efbdbff9 100644 --- a/Lib/test/libregrtest/cmdline.py +++ b/Lib/test/libregrtest/cmdline.py @@ -87,38 +87,40 @@ The argument is a comma-separated list of words indicating the resources to test. Currently only the following are defined: - all - Enable all special resources. + all - Enable all special resources. - none - Disable all special resources (this is the default). + none - Disable all special resources (this is the default). - audio - Tests that use the audio device. (There are known - cases of broken audio drivers that can crash Python or - even the Linux kernel.) + audio - Tests that use the audio device. (There are known + cases of broken audio drivers that can crash Python or + even the Linux kernel.) - curses - Tests that use curses and will modify the terminal's - state and output modes. + curses - Tests that use curses and will modify the terminal's + state and output modes. - largefile - It is okay to run some test that may create huge - files. These tests can take a long time and may - consume >2 GiB of disk space temporarily. + largefile - It is okay to run some test that may create huge + files. These tests can take a long time and may + consume >2 GiB of disk space temporarily. - network - It is okay to run tests that use external network - resource, e.g. testing SSL support for sockets. + extralargefile - Like 'largefile', but even larger (and slower). - decimal - Test the decimal module against a large suite that - verifies compliance with standards. + network - It is okay to run tests that use external network + resource, e.g. testing SSL support for sockets. - cpu - Used for certain CPU-heavy tests. + decimal - Test the decimal module against a large suite that + verifies compliance with standards. - walltime - Long running but not CPU-bound tests. + cpu - Used for certain CPU-heavy tests. - subprocess Run all tests for the subprocess module. + walltime - Long running but not CPU-bound tests. - urlfetch - It is okay to download files required on testing. + subprocess Run all tests for the subprocess module. - gui - Run tests that require a running GUI. + urlfetch - It is okay to download files required on testing. - tzdata - Run tests that require timezone data. + gui - Run tests that require a running GUI. + + tzdata - Run tests that require timezone data. To enable all resources except one, use '-uall,-'. For example, to run all the tests except for the gui tests, give the From 4331832db02ff4a7598dcdd99cae31087173dce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:12:33 +0100 Subject: [PATCH 36/55] gh-125420: implement `Sequence.count` API on `memoryview` objects (#125443) --- Doc/library/stdtypes.rst | 8 ++- Lib/test/test_memoryview.py | 28 +++++++++++ ...-10-14-12-34-51.gh-issue-125420.jABXoZ.rst | 2 + Objects/clinic/memoryobject.c.h | 11 +++- Objects/memoryobject.c | 50 +++++++++++++++++++ 5 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-12-34-51.gh-issue-125420.jABXoZ.rst diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index f4e635dd61e3dc..d5f7714ced6873 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -4149,7 +4149,13 @@ copying. .. versionchanged:: 3.5 The source format is no longer restricted when casting to a byte view. - .. method:: index(value, start=0, stop=sys.maxsize, /) + .. method:: count(value, /) + + Count the number of occurrences of *value*. + + .. versionadded:: next + + .. method:: index(value, start=0, stop=sys.maxsize, /) Return the index of the first occurrence of *value* (at or after index *start* and before index *stop*). diff --git a/Lib/test/test_memoryview.py b/Lib/test/test_memoryview.py index 8f07828c19790d..96320ebef6467a 100644 --- a/Lib/test/test_memoryview.py +++ b/Lib/test/test_memoryview.py @@ -90,6 +90,22 @@ def test_iter(self): m = self._view(b) self.assertEqual(list(m), [m[i] for i in range(len(m))]) + def test_count(self): + for tp in self._types: + b = tp(self._source) + m = self._view(b) + l = m.tolist() + for ch in list(m): + self.assertEqual(m.count(ch), l.count(ch)) + + b = tp((b'a' * 5) + (b'c' * 3)) + m = self._view(b) # may be sliced + l = m.tolist() + with self.subTest('count', buffer=b): + self.assertEqual(m.count(ord('a')), l.count(ord('a'))) + self.assertEqual(m.count(ord('b')), l.count(ord('b'))) + self.assertEqual(m.count(ord('c')), l.count(ord('c'))) + def test_setitem_readonly(self): if not self.ro_type: self.skipTest("no read-only type to test") @@ -464,6 +480,18 @@ def _view(self, obj): def _check_contents(self, tp, obj, contents): self.assertEqual(obj, tp(contents)) + def test_count(self): + super().test_count() + for tp in self._types: + b = tp((b'a' * 5) + (b'c' * 3)) + m = self._view(b) # should not be sliced + self.assertEqual(len(b), len(m)) + with self.subTest('count', buffer=b): + self.assertEqual(m.count(ord('a')), 5) + self.assertEqual(m.count(ord('b')), 0) + self.assertEqual(m.count(ord('c')), 3) + + class BaseMemorySliceTests: source_bytes = b"XabcdefY" diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-12-34-51.gh-issue-125420.jABXoZ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-12-34-51.gh-issue-125420.jABXoZ.rst new file mode 100644 index 00000000000000..ef120802b5dc59 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-14-12-34-51.gh-issue-125420.jABXoZ.rst @@ -0,0 +1,2 @@ +Add :meth:`memoryview.count` to :class:`memoryview` objects. Patch by +Bénédikt Tran. diff --git a/Objects/clinic/memoryobject.c.h b/Objects/clinic/memoryobject.c.h index 9b8a23317f0da0..a6cf1f431a15b0 100644 --- a/Objects/clinic/memoryobject.c.h +++ b/Objects/clinic/memoryobject.c.h @@ -419,6 +419,15 @@ memoryview_hex(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t nargs return return_value; } +PyDoc_STRVAR(memoryview_count__doc__, +"count($self, value, /)\n" +"--\n" +"\n" +"Count the number of occurrences of a value."); + +#define MEMORYVIEW_COUNT_METHODDEF \ + {"count", (PyCFunction)memoryview_count, METH_O, memoryview_count__doc__}, + PyDoc_STRVAR(memoryview_index__doc__, "index($self, value, start=0, stop=sys.maxsize, /)\n" "--\n" @@ -464,4 +473,4 @@ memoryview_index(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t nar exit: return return_value; } -/*[clinic end generated code: output=2742d371dba7314f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=132893ef5f67ad73 input=a9049054013a1b77]*/ diff --git a/Objects/memoryobject.c b/Objects/memoryobject.c index 345aa79c328125..afe2dcb127adb4 100644 --- a/Objects/memoryobject.c +++ b/Objects/memoryobject.c @@ -2748,6 +2748,55 @@ static PySequenceMethods memory_as_sequence = { }; +/****************************************************************************/ +/* Counting */ +/****************************************************************************/ + +/*[clinic input] +memoryview.count + + value: object + / + +Count the number of occurrences of a value. +[clinic start generated code]*/ + +static PyObject * +memoryview_count(PyMemoryViewObject *self, PyObject *value) +/*[clinic end generated code: output=e2c255a8d54eaa12 input=e3036ce1ed7d1823]*/ +{ + PyObject *iter = PyObject_GetIter(_PyObject_CAST(self)); + if (iter == NULL) { + return NULL; + } + + Py_ssize_t count = 0; + PyObject *item = NULL; + while (PyIter_NextItem(iter, &item)) { + if (item == NULL) { + Py_DECREF(iter); + return NULL; + } + if (item == value) { + Py_DECREF(item); + count++; // no overflow since count <= len(mv) <= PY_SSIZE_T_MAX + continue; + } + int contained = PyObject_RichCompareBool(item, value, Py_EQ); + Py_DECREF(item); + if (contained > 0) { // more likely than 'contained < 0' + count++; // no overflow since count <= len(mv) <= PY_SSIZE_T_MAX + } + else if (contained < 0) { + Py_DECREF(iter); + return NULL; + } + } + Py_DECREF(iter); + return PyLong_FromSsize_t(count); +} + + /**************************************************************************/ /* Lookup */ /**************************************************************************/ @@ -3370,6 +3419,7 @@ static PyMethodDef memory_methods[] = { MEMORYVIEW_CAST_METHODDEF MEMORYVIEW_TOREADONLY_METHODDEF MEMORYVIEW__FROM_FLAGS_METHODDEF + MEMORYVIEW_COUNT_METHODDEF MEMORYVIEW_INDEX_METHODDEF {"__enter__", memory_enter, METH_NOARGS, NULL}, {"__exit__", memory_exit, METH_VARARGS, memory_exit_doc}, From 8dbdbad6e085564ec20109fc8277fa54b3fcc2db Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Tue, 10 Dec 2024 19:22:37 +0900 Subject: [PATCH 37/55] gh-101100: Fix sphinx warnings in `whatsnew/3.0.rst` (#127662) --- Doc/library/xmlrpc.rst | 3 ++ Doc/tools/.nitignore | 1 - Doc/whatsnew/3.0.rst | 112 ++++++++++++++++++++--------------------- 3 files changed, 59 insertions(+), 57 deletions(-) diff --git a/Doc/library/xmlrpc.rst b/Doc/library/xmlrpc.rst index 5f0a2cf68d01f9..a93d08f78cfba7 100644 --- a/Doc/library/xmlrpc.rst +++ b/Doc/library/xmlrpc.rst @@ -1,6 +1,9 @@ :mod:`!xmlrpc` --- XMLRPC server and client modules =================================================== +.. module:: xmlrpc + :synopsis: Server and client modules implementing XML-RPC. + XML-RPC is a Remote Procedure Call method that uses XML passed via HTTP as a transport. With it, a client can call methods with parameters on a remote server (the server is named by a URI) and get back structured data. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 39d1f5975e331c..7d50aec56a9bf7 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -76,7 +76,6 @@ Doc/whatsnew/2.4.rst Doc/whatsnew/2.5.rst Doc/whatsnew/2.6.rst Doc/whatsnew/2.7.rst -Doc/whatsnew/3.0.rst Doc/whatsnew/3.3.rst Doc/whatsnew/3.4.rst Doc/whatsnew/3.5.rst diff --git a/Doc/whatsnew/3.0.rst b/Doc/whatsnew/3.0.rst index d97f5fdd9eaa4a..6e1fda22ed203a 100644 --- a/Doc/whatsnew/3.0.rst +++ b/Doc/whatsnew/3.0.rst @@ -150,8 +150,8 @@ Some well-known APIs no longer return lists: sorted(d)`` instead (this works in Python 2.5 too and is just as efficient). -* Also, the :meth:`dict.iterkeys`, :meth:`dict.iteritems` and - :meth:`dict.itervalues` methods are no longer supported. +* Also, the :meth:`!dict.iterkeys`, :meth:`!dict.iteritems` and + :meth:`!dict.itervalues` methods are no longer supported. * :func:`map` and :func:`filter` return iterators. If you really need a list and the input sequences are all of equal length, a quick @@ -170,7 +170,7 @@ Some well-known APIs no longer return lists: :func:`itertools.zip_longest`, e.g. ``map(func, *sequences)`` becomes ``list(map(func, itertools.zip_longest(*sequences)))``. -* :func:`range` now behaves like :func:`xrange` used to behave, except +* :func:`range` now behaves like :func:`!xrange` used to behave, except it works with values of arbitrary size. The latter no longer exists. @@ -192,33 +192,33 @@ Python 3.0 has simplified the rules for ordering comparisons: operators: objects of different incomparable types always compare unequal to each other. -* :meth:`builtin.sorted` and :meth:`list.sort` no longer accept the +* :meth:`sorted` and :meth:`list.sort` no longer accept the *cmp* argument providing a comparison function. Use the *key* argument instead. N.B. the *key* and *reverse* arguments are now "keyword-only". -* The :func:`cmp` function should be treated as gone, and the :meth:`__cmp__` - special method is no longer supported. Use :meth:`__lt__` for sorting, - :meth:`__eq__` with :meth:`__hash__`, and other rich comparisons as needed. - (If you really need the :func:`cmp` functionality, you could use the +* The :func:`!cmp` function should be treated as gone, and the :meth:`!__cmp__` + special method is no longer supported. Use :meth:`~object.__lt__` for sorting, + :meth:`~object.__eq__` with :meth:`~object.__hash__`, and other rich comparisons as needed. + (If you really need the :func:`!cmp` functionality, you could use the expression ``(a > b) - (a < b)`` as the equivalent for ``cmp(a, b)``.) Integers -------- -* :pep:`237`: Essentially, :class:`long` renamed to :class:`int`. +* :pep:`237`: Essentially, :class:`!long` renamed to :class:`int`. That is, there is only one built-in integral type, named - :class:`int`; but it behaves mostly like the old :class:`long` type. + :class:`int`; but it behaves mostly like the old :class:`!long` type. * :pep:`238`: An expression like ``1/2`` returns a float. Use ``1//2`` to get the truncating behavior. (The latter syntax has existed for years, at least since Python 2.2.) -* The :data:`sys.maxint` constant was removed, since there is no +* The :data:`!sys.maxint` constant was removed, since there is no longer a limit to the value of integers. However, :data:`sys.maxsize` can be used as an integer larger than any practical list or string index. It conforms to the implementation's "natural" integer size - and is typically the same as :data:`sys.maxint` in previous releases + and is typically the same as :data:`!sys.maxint` in previous releases on the same platform (assuming the same build options). * The :func:`repr` of a long integer doesn't include the trailing ``L`` @@ -251,7 +251,7 @@ changed. that uses Unicode, encodings or binary data most likely has to change. The change is for the better, as in the 2.x world there were numerous bugs having to do with mixing encoded and unencoded - text. To be prepared in Python 2.x, start using :class:`unicode` + text. To be prepared in Python 2.x, start using :class:`!unicode` for all unencoded text, and :class:`str` for binary or encoded data only. Then the ``2to3`` tool will do most of the work for you. @@ -269,7 +269,7 @@ changed. separate *mutable* type to hold buffered binary data, :class:`bytearray`. Nearly all APIs that accept :class:`bytes` also accept :class:`bytearray`. The mutable API is based on - :class:`collections.MutableSequence`. + :class:`collections.MutableSequence `. * All backslashes in raw string literals are interpreted literally. This means that ``'\U'`` and ``'\u'`` escapes in raw strings are not @@ -278,11 +278,11 @@ changed. single "euro" character. (Of course, this change only affects raw string literals; the euro character is ``'\u20ac'`` in Python 3.0.) -* The built-in :class:`basestring` abstract type was removed. Use +* The built-in :class:`!basestring` abstract type was removed. Use :class:`str` instead. The :class:`str` and :class:`bytes` types don't have functionality enough in common to warrant a shared base class. The ``2to3`` tool (see below) replaces every occurrence of - :class:`basestring` with :class:`str`. + :class:`!basestring` with :class:`str`. * Files opened as text files (still the default mode for :func:`open`) always use an encoding to map between strings (in memory) and bytes @@ -428,7 +428,7 @@ Changed Syntax class C(metaclass=M): ... - The module-global :data:`__metaclass__` variable is no longer + The module-global :data:`!__metaclass__` variable is no longer supported. (It was a crutch to make it easier to default to new-style classes without deriving every class from :class:`object`.) @@ -522,19 +522,19 @@ consulted for longer descriptions. *encoding*, *errors*, *newline* and *closefd*. Also note that an invalid *mode* argument now raises :exc:`ValueError`, not :exc:`IOError`. The binary file object underlying a text file - object can be accessed as :attr:`f.buffer` (but beware that the + object can be accessed as :attr:`!f.buffer` (but beware that the text object maintains a buffer of itself in order to speed up the encoding and decoding operations). -* :ref:`pep-3118`. The old builtin :func:`buffer` is now really gone; +* :ref:`pep-3118`. The old builtin :func:`!buffer` is now really gone; the new builtin :func:`memoryview` provides (mostly) similar functionality. * :ref:`pep-3119`. The :mod:`abc` module and the ABCs defined in the :mod:`collections` module plays a somewhat more prominent role in the language now, and built-in collection types like :class:`dict` - and :class:`list` conform to the :class:`collections.MutableMapping` - and :class:`collections.MutableSequence` ABCs, respectively. + and :class:`list` conform to the :class:`collections.MutableMapping ` + and :class:`collections.MutableSequence ` ABCs, respectively. * :ref:`pep-3127`. As mentioned above, the new octal literal notation is the only one supported, and binary literals have been @@ -612,7 +612,7 @@ review: :mod:`!CGIHTTPServer`, :mod:`!SimpleHTTPServer`, :mod:`!Cookie`, :mod:`!cookielib`). - * :mod:`tkinter` (all :mod:`Tkinter`-related modules except + * :mod:`tkinter` (all ``Tkinter``-related modules except :mod:`turtle`). The target audience of :mod:`turtle` doesn't really care about :mod:`tkinter`. Also note that as of Python 2.6, the functionality of :mod:`turtle` has been greatly enhanced. @@ -628,47 +628,47 @@ Some other changes to standard library modules, not covered by * Killed :mod:`!sets`. Use the built-in :func:`set` class. -* Cleanup of the :mod:`sys` module: removed :func:`sys.exitfunc`, - :func:`sys.exc_clear`, :data:`sys.exc_type`, :data:`sys.exc_value`, - :data:`sys.exc_traceback`. (Note that :data:`sys.last_type` +* Cleanup of the :mod:`sys` module: removed :func:`!sys.exitfunc`, + :func:`!sys.exc_clear`, :data:`!sys.exc_type`, :data:`!sys.exc_value`, + :data:`!sys.exc_traceback`. (Note that :data:`sys.last_type` etc. remain.) -* Cleanup of the :class:`array.array` type: the :meth:`read` and - :meth:`write` methods are gone; use :meth:`fromfile` and - :meth:`tofile` instead. Also, the ``'c'`` typecode for array is +* Cleanup of the :class:`array.array` type: the :meth:`!read` and + :meth:`!write` methods are gone; use :meth:`~array.array.fromfile` and + :meth:`~array.array.tofile` instead. Also, the ``'c'`` typecode for array is gone -- use either ``'b'`` for bytes or ``'u'`` for Unicode characters. * Cleanup of the :mod:`operator` module: removed - :func:`sequenceIncludes` and :func:`isCallable`. + :func:`!sequenceIncludes` and :func:`!isCallable`. * Cleanup of the :mod:`!thread` module: :func:`!acquire_lock` and :func:`!release_lock` are gone; use :meth:`~threading.Lock.acquire` and :meth:`~threading.Lock.release` instead. -* Cleanup of the :mod:`random` module: removed the :func:`jumpahead` API. +* Cleanup of the :mod:`random` module: removed the :func:`!jumpahead` API. * The :mod:`!new` module is gone. -* The functions :func:`os.tmpnam`, :func:`os.tempnam` and - :func:`os.tmpfile` have been removed in favor of the :mod:`tempfile` +* The functions :func:`!os.tmpnam`, :func:`!os.tempnam` and + :func:`!os.tmpfile` have been removed in favor of the :mod:`tempfile` module. * The :mod:`tokenize` module has been changed to work with bytes. The main entry point is now :func:`tokenize.tokenize`, instead of generate_tokens. -* :data:`string.letters` and its friends (:data:`string.lowercase` and - :data:`string.uppercase`) are gone. Use +* :data:`!string.letters` and its friends (:data:`!string.lowercase` and + :data:`!string.uppercase`) are gone. Use :data:`string.ascii_letters` etc. instead. (The reason for the - removal is that :data:`string.letters` and friends had + removal is that :data:`!string.letters` and friends had locale-specific behavior, which is a bad idea for such attractively named global "constants".) -* Renamed module :mod:`__builtin__` to :mod:`builtins` (removing the - underscores, adding an 's'). The :data:`__builtins__` variable +* Renamed module :mod:`!__builtin__` to :mod:`builtins` (removing the + underscores, adding an 's'). The :data:`!__builtins__` variable found in most global namespaces is unchanged. To modify a builtin, - you should use :mod:`builtins`, not :data:`__builtins__`! + you should use :mod:`builtins`, not :data:`!__builtins__`! :pep:`3101`: A New Approach To String Formatting @@ -702,9 +702,9 @@ new powerful features added: idiom for handling all exceptions except for this latter category is to use :keyword:`except` :exc:`Exception`. -* :exc:`StandardError` was removed. +* :exc:`!StandardError` was removed. -* Exceptions no longer behave as sequences. Use the :attr:`args` +* Exceptions no longer behave as sequences. Use the :attr:`~BaseException.args` attribute instead. * :pep:`3109`: Raising exceptions. You must now use :samp:`raise @@ -765,20 +765,20 @@ Operators And Special Methods When referencing a method as a class attribute, you now get a plain function object. -* :meth:`__getslice__`, :meth:`__setslice__` and :meth:`__delslice__` +* :meth:`!__getslice__`, :meth:`!__setslice__` and :meth:`!__delslice__` were killed. The syntax ``a[i:j]`` now translates to - ``a.__getitem__(slice(i, j))`` (or :meth:`__setitem__` or - :meth:`__delitem__`, when used as an assignment or deletion target, + ``a.__getitem__(slice(i, j))`` (or :meth:`~object.__setitem__` or + :meth:`~object.__delitem__`, when used as an assignment or deletion target, respectively). * :pep:`3114`: the standard :meth:`next` method has been renamed to :meth:`~iterator.__next__`. -* The :meth:`__oct__` and :meth:`__hex__` special methods are removed - -- :func:`oct` and :func:`hex` use :meth:`__index__` now to convert +* The :meth:`!__oct__` and :meth:`!__hex__` special methods are removed + -- :func:`oct` and :func:`hex` use :meth:`~object.__index__` now to convert the argument to an integer. -* Removed support for :attr:`__members__` and :attr:`__methods__`. +* Removed support for :attr:`!__members__` and :attr:`!__methods__`. * The function attributes named :attr:`!func_X` have been renamed to use the :attr:`!__X__` form, freeing up these names in the function @@ -802,7 +802,7 @@ Builtins instance will automatically be chosen. With arguments, the behavior of :func:`super` is unchanged. -* :pep:`3111`: :func:`raw_input` was renamed to :func:`input`. That +* :pep:`3111`: :func:`!raw_input` was renamed to :func:`input`. That is, the new :func:`input` function reads a line from :data:`sys.stdin` and returns it with the trailing newline stripped. It raises :exc:`EOFError` if the input is terminated prematurely. @@ -820,31 +820,31 @@ Builtins argument and a value of the same type as ``x`` when called with two arguments. -* Moved :func:`intern` to :func:`sys.intern`. +* Moved :func:`!intern` to :func:`sys.intern`. -* Removed: :func:`apply`. Instead of ``apply(f, args)`` use +* Removed: :func:`!apply`. Instead of ``apply(f, args)`` use ``f(*args)``. * Removed :func:`callable`. Instead of ``callable(f)`` you can use - ``isinstance(f, collections.Callable)``. The :func:`operator.isCallable` + ``isinstance(f, collections.Callable)``. The :func:`!operator.isCallable` function is also gone. -* Removed :func:`coerce`. This function no longer serves a purpose +* Removed :func:`!coerce`. This function no longer serves a purpose now that classic classes are gone. -* Removed :func:`execfile`. Instead of ``execfile(fn)`` use +* Removed :func:`!execfile`. Instead of ``execfile(fn)`` use ``exec(open(fn).read())``. -* Removed the :class:`file` type. Use :func:`open`. There are now several +* Removed the :class:`!file` type. Use :func:`open`. There are now several different kinds of streams that open can return in the :mod:`io` module. -* Removed :func:`reduce`. Use :func:`functools.reduce` if you really +* Removed :func:`!reduce`. Use :func:`functools.reduce` if you really need it; however, 99 percent of the time an explicit :keyword:`for` loop is more readable. -* Removed :func:`reload`. Use :func:`!imp.reload`. +* Removed :func:`!reload`. Use :func:`!imp.reload`. -* Removed. :meth:`dict.has_key` -- use the :keyword:`in` operator +* Removed. :meth:`!dict.has_key` -- use the :keyword:`in` operator instead. .. ====================================================================== From ae31df354d02e12bf656954c5c72380d96c1dc0e Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 10 Dec 2024 12:51:12 +0200 Subject: [PATCH 38/55] Add zizmor to pre-commit and fix most findings (#127749) Co-authored-by: Alex Waygood --- .github/workflows/build.yml | 11 ++++++++++- .github/workflows/documentation-links.yml | 6 +++--- .github/workflows/jit.yml | 6 ++++++ .github/workflows/lint.yml | 2 ++ .github/workflows/mypy.yml | 2 ++ .github/workflows/require-pr-label.yml | 10 ++++++---- .github/workflows/reusable-change-detection.yml | 2 ++ .github/workflows/reusable-docs.yml | 14 ++++++++++---- .github/workflows/reusable-macos.yml | 2 ++ .github/workflows/reusable-tsan.yml | 9 +++++++-- .github/workflows/reusable-ubuntu.yml | 4 +++- .github/workflows/reusable-wasi.yml | 12 +++++++----- .github/workflows/reusable-windows-msi.yml | 5 ++++- .github/workflows/reusable-windows.yml | 10 ++++++++-- .github/workflows/stale.yml | 5 ++--- .github/workflows/verify-ensurepip-wheels.yml | 2 ++ .github/zizmor.yml | 6 ++++++ .pre-commit-config.yaml | 9 +++++++-- 18 files changed, 89 insertions(+), 28 deletions(-) create mode 100644 .github/zizmor.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9b2f19fd6bcf54..8787402ccc4423 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -58,6 +58,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 1 + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Check Autoconf and aclocal versions @@ -94,6 +95,8 @@ jobs: if: needs.check_source.outputs.run_tests == 'true' steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3.x' @@ -268,6 +271,8 @@ jobs: LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Restore config.cache @@ -328,6 +333,8 @@ jobs: PYTHONSTRICTEXTENSIONBUILD: 1 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install Dependencies @@ -411,7 +418,7 @@ jobs: # # (GH-104097) test_sysconfig is skipped because it has tests that are # failing when executed from inside a virtual environment. - ${{ env.VENV_PYTHON }} -m test \ + "${VENV_PYTHON}" -m test \ -W \ -o \ -j4 \ @@ -446,6 +453,8 @@ jobs: ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Restore config.cache diff --git a/.github/workflows/documentation-links.yml b/.github/workflows/documentation-links.yml index 43a7afec73884e..fdb4b9aa29a7c8 100644 --- a/.github/workflows/documentation-links.yml +++ b/.github/workflows/documentation-links.yml @@ -10,9 +10,6 @@ on: - 'Doc/**' - '.github/workflows/doc.yml' -permissions: - pull-requests: write - concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true @@ -20,6 +17,9 @@ concurrency: jobs: documentation-links: runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: - uses: readthedocs/actions/preview@v1 with: diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index ee30cf5786d55b..9b84998a55666d 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -32,6 +32,8 @@ jobs: timeout-minutes: 90 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Build tier two interpreter run: | ./configure --enable-experimental-jit=interpreter --with-pydebug @@ -85,6 +87,8 @@ jobs: runner: ${{ github.repository_owner == 'python' && 'ubuntu-24.04-aarch64' || 'ubuntu-24.04' }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3.11' @@ -138,6 +142,8 @@ jobs: - 19 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3.11' diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index ccde03f91983df..d74ce8fcc256dc 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,6 +20,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: "3.x" diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index e5b05302b5ac27..5dfa8d7bcafd78 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -51,6 +51,8 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: "3.13" diff --git a/.github/workflows/require-pr-label.yml b/.github/workflows/require-pr-label.yml index bbedd22cc6d189..0a6277c779ff67 100644 --- a/.github/workflows/require-pr-label.yml +++ b/.github/workflows/require-pr-label.yml @@ -4,15 +4,14 @@ on: pull_request: types: [opened, reopened, labeled, unlabeled, synchronize] -permissions: - issues: write - pull-requests: write - jobs: label-dnm: name: DO-NOT-MERGE if: github.repository_owner == 'python' runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write timeout-minutes: 10 steps: @@ -28,6 +27,9 @@ jobs: name: Unresolved review if: github.repository_owner == 'python' runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write timeout-minutes: 10 steps: diff --git a/.github/workflows/reusable-change-detection.yml b/.github/workflows/reusable-change-detection.yml index 1a6fd33186840c..964bd87e815f42 100644 --- a/.github/workflows/reusable-change-detection.yml +++ b/.github/workflows/reusable-change-detection.yml @@ -61,6 +61,8 @@ jobs: - run: >- echo '${{ github.event_name }}' - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Check for source changes id: check run: | diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 39a97392e898aa..3962d12403919a 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -22,12 +22,14 @@ jobs: env: branch_base: 'origin/${{ github.event.pull_request.base.ref }}' branch_pr: 'origin/${{ github.event.pull_request.head.ref }}' + commits: ${{ github.event.pull_request.commits }} refspec_base: '+${{ github.event.pull_request.base.sha }}:remotes/origin/${{ github.event.pull_request.base.ref }}' refspec_pr: '+${{ github.event.pull_request.head.sha }}:remotes/origin/${{ github.event.pull_request.head.ref }}' steps: - name: 'Check out latest PR branch commit' uses: actions/checkout@v4 with: + persist-credentials: false ref: >- ${{ github.event_name == 'pull_request' @@ -39,15 +41,15 @@ jobs: if: github.event_name == 'pull_request' run: | # Fetch enough history to find a common ancestor commit (aka merge-base): - git fetch origin ${{ env.refspec_pr }} --depth=$(( ${{ github.event.pull_request.commits }} + 1 )) \ + git fetch origin "${refspec_pr}" --depth=$(( commits + 1 )) \ --no-tags --prune --no-recurse-submodules # This should get the oldest commit in the local fetched history (which may not be the commit the PR branched from): - COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 ${{ env.branch_pr }} ) + COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 "${branch_pr}" ) DATE=$( git log --date=iso8601 --format=%cd "${COMMON_ANCESTOR}" ) # Get all commits since that commit date from the base branch (eg: master or main): - git fetch origin ${{ env.refspec_base }} --shallow-since="${DATE}" \ + git fetch origin "${refspec_base}" --shallow-since="${DATE}" \ --no-tags --prune --no-recurse-submodules - name: 'Set up Python' uses: actions/setup-python@v5 @@ -69,7 +71,7 @@ jobs: if: github.event_name == 'pull_request' run: | python Doc/tools/check-warnings.py \ - --annotate-diff '${{ env.branch_base }}' '${{ env.branch_pr }}' \ + --annotate-diff "${branch_base}" "${branch_pr}" \ --fail-if-regression \ --fail-if-improved \ --fail-if-new-news-nit @@ -81,6 +83,8 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: 'Set up Python' uses: actions/setup-python@v5 with: @@ -99,6 +103,8 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/cache@v4 with: path: ~/.cache/pip diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index 4c3dd10194f8cb..36ae3e27207e37 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -29,6 +29,8 @@ jobs: runs-on: ${{ inputs.os }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Restore config.cache diff --git a/.github/workflows/reusable-tsan.yml b/.github/workflows/reusable-tsan.yml index 7a4d81f0bdcad1..b5144ca3e9efc4 100644 --- a/.github/workflows/reusable-tsan.yml +++ b/.github/workflows/reusable-tsan.yml @@ -23,8 +23,13 @@ jobs: name: 'Thread sanitizer' runs-on: ubuntu-24.04 timeout-minutes: 60 + env: + OPTIONS: ${{ inputs.options }} + SUPPRESSIONS_PATH: ${{ inputs.suppressions_path }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Runner image version run: echo "IMAGE_VERSION=${ImageVersion}" >> "$GITHUB_ENV" - name: Restore config.cache @@ -47,7 +52,7 @@ jobs: sudo sysctl -w vm.mmap_rnd_bits=28 - name: TSAN Option Setup run: | - echo "TSAN_OPTIONS=log_path=${GITHUB_WORKSPACE}/tsan_log suppressions=${GITHUB_WORKSPACE}/${{ inputs.suppressions_path }} handle_segv=0" >> "$GITHUB_ENV" + echo "TSAN_OPTIONS=log_path=${GITHUB_WORKSPACE}/tsan_log suppressions=${GITHUB_WORKSPACE}/${SUPPRESSIONS_PATH} handle_segv=0" >> "$GITHUB_ENV" echo "CC=clang" >> "$GITHUB_ENV" echo "CXX=clang++" >> "$GITHUB_ENV" - name: Add ccache to PATH @@ -59,7 +64,7 @@ jobs: save: ${{ github.event_name == 'push' }} max-size: "200M" - name: Configure CPython - run: ${{ inputs.options }} + run: "${OPTIONS}" - name: Build CPython run: make -j4 - name: Display build info diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 2869202c7910c9..46c542940c8483 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -28,6 +28,8 @@ jobs: TERM: linux steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install dependencies @@ -94,7 +96,7 @@ jobs: if: ${{ !inputs.free-threading }} run: >- python Tools/build/check_warnings.py - --compiler-output-file-path=${{ env.CPYTHON_BUILDDIR }}/compiler_output_ubuntu.txt + --compiler-output-file-path="${CPYTHON_BUILDDIR}/compiler_output_ubuntu.txt" --warning-ignore-file-path "${GITHUB_WORKSPACE}/Tools/build/.warningignore_ubuntu" --compiler-output-type=gcc --fail-on-regression diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 3f96c888e2dd30..4356d9c1c8795e 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -20,6 +20,8 @@ jobs: CROSS_BUILD_WASI: cross-build/wasm32-wasip1 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false # No problem resolver registered as one doesn't currently exist for Clang. - name: "Install wasmtime" uses: bytecodealliance/actions/wasmtime/setup@v1 @@ -34,9 +36,9 @@ jobs: - name: "Install WASI SDK" # Hard-coded to x64. if: steps.cache-wasi-sdk.outputs.cache-hit != 'true' run: | - mkdir ${{ env.WASI_SDK_PATH }} && \ - curl -s -S --location https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${{ env.WASI_SDK_VERSION }}/wasi-sdk-${{ env.WASI_SDK_VERSION }}.0-x86_64-linux.tar.gz | \ - tar --strip-components 1 --directory ${{ env.WASI_SDK_PATH }} --extract --gunzip + mkdir "${WASI_SDK_PATH}" && \ + curl -s -S --location "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VERSION}/wasi-sdk-${WASI_SDK_VERSION}.0-x86_64-linux.tar.gz" | \ + tar --strip-components 1 --directory "${WASI_SDK_PATH}" --extract --gunzip - name: "Configure ccache action" uses: hendrikmuhs/ccache-action@v1.2 with: @@ -72,6 +74,6 @@ jobs: - name: "Make host" run: python3 Tools/wasm/wasi.py make-host - name: "Display build info" - run: make --directory ${{ env.CROSS_BUILD_WASI }} pythoninfo + run: make --directory "${CROSS_BUILD_WASI}" pythoninfo - name: "Test" - run: make --directory ${{ env.CROSS_BUILD_WASI }} test + run: make --directory "${CROSS_BUILD_WASI}" test diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index abdb1a1982fef8..d0d53dba0b45d1 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -17,8 +17,11 @@ jobs: runs-on: windows-latest timeout-minutes: 60 env: + ARCH: ${{ inputs.arch }} IncludeFreethreaded: true steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Build CPython installer - run: .\Tools\msi\build.bat --doc -${{ inputs.arch }} + run: .\Tools\msi\build.bat --doc -"${ARCH}" diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index 12b68d68466d62..459d2b29e5d42b 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -26,8 +26,12 @@ jobs: name: 'build and test (${{ inputs.arch }})' runs-on: ${{ inputs.os }} timeout-minutes: 60 + env: + ARCH: ${{ inputs.arch }} steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - name: Register MSVC problem matcher if: inputs.arch != 'Win32' run: echo "::add-matcher::.github/problem-matchers/msvc.json" @@ -35,8 +39,9 @@ jobs: run: >- .\\PCbuild\\build.bat -e -d -v - -p ${{ inputs.arch }} + -p "${ARCH}" ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} + shell: bash - name: Display build info # FIXME(diegorusso): remove the `if` if: inputs.arch != 'arm64' run: .\\python.bat -m test.pythoninfo @@ -44,6 +49,7 @@ jobs: if: inputs.arch != 'arm64' run: >- .\\PCbuild\\rt.bat - -p ${{ inputs.arch }} + -p "${ARCH}" -d -q --fast-ci ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} + shell: bash diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index f97587e68cbbe4..7578189f5d4d67 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -4,14 +4,13 @@ on: schedule: - cron: "0 */6 * * *" -permissions: - pull-requests: write - jobs: stale: if: github.repository_owner == 'python' runs-on: ubuntu-latest + permissions: + pull-requests: write timeout-minutes: 10 steps: diff --git a/.github/workflows/verify-ensurepip-wheels.yml b/.github/workflows/verify-ensurepip-wheels.yml index 83b007f1c9c2ef..463e7bf3355cc3 100644 --- a/.github/workflows/verify-ensurepip-wheels.yml +++ b/.github/workflows/verify-ensurepip-wheels.yml @@ -26,6 +26,8 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: '3' diff --git a/.github/zizmor.yml b/.github/zizmor.yml new file mode 100644 index 00000000000000..eeda8d9eaaf484 --- /dev/null +++ b/.github/zizmor.yml @@ -0,0 +1,6 @@ +# Configuration for the zizmor static analysis tool, run via pre-commit in CI +# https://woodruffw.github.io/zizmor/configuration/ +rules: + dangerous-triggers: + ignore: + - documentation-links.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccaf2390d99fae..107f3b255735f4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.8.2 hooks: - id: ruff name: Run Ruff (lint) on Doc/ @@ -51,7 +51,7 @@ repos: types_or: [c, inc, python, rst] - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.29.4 + rev: 0.30.0 hooks: - id: check-dependabot - id: check-github-workflows @@ -61,6 +61,11 @@ repos: hooks: - id: actionlint + - repo: https://github.com/woodruffw/zizmor-pre-commit + rev: v0.8.0 + hooks: + - id: zizmor + - repo: https://github.com/sphinx-contrib/sphinx-lint rev: v1.0.0 hooks: From 690fe077f6b1bf50e9d62078b22c334775efb3af Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 10 Dec 2024 11:53:56 +0100 Subject: [PATCH 39/55] gh-126491: Revert "GH-126491: Lower heap size limit with faster marking (GH-127519)" (GH-127770) Revert "GH-126491: Lower heap size limit with faster marking (GH-127519)" This reverts commit 023b7d2141467017abc27de864f3f44677768cb3, which introduced a refleak. --- InternalDocs/garbage_collector.md | 51 +---- Lib/test/test_gc.py | 14 +- Objects/dictobject.c | 4 +- Objects/genobject.c | 69 ++++++- Objects/typeobject.c | 13 -- Python/gc.c | 301 ++++++++++++++++-------------- 6 files changed, 243 insertions(+), 209 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 394e4ef075f55e..e4cb9e45c9e96a 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -515,45 +515,6 @@ increment. All objects directly referred to from those stack frames are added to the working set. Then the above algorithm is repeated, starting from step 2. -Determining how much work to do -------------------------------- - -We need to do a certain amount of work to ensure that garbage is collected, -but doing too much work slows down execution. - -To work out how much work we need to do, consider a heap with `L` live objects -and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects -at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and -we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. -For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which -`G1` garbage objects are created. - -The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, -assuming that the number of live objects remains roughly constant. -If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` -For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio. - -In other words, to keep the garbage fraction to 1/3 or less we need to visit -4 times as many objects as are newly created. - -We can do better than this though. Not all new objects will be garbage. -Consider the heap at the end of the scavenge with `L1` live objects and `G1` -garbage. Also, note that `T == M + I` where `M` is the number of objects marked -as reachable and `I` is the number of objects visited in increments. -Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. - -If we choose the amount of work done such that `2*M + I == 6N` then we can do -less work in most cases, but are still guaranteed to keep up. -Since `I ≳ G0 + G1` (not strictly true, but close enough) -`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up. - -The reason that this improves performance is that `M` is usually much larger -than `I`. If `M == 10I`, then `T ≅ 3N`. - -Finally, instead of using a fixed multiple of 8, we gradually increase it as the -heap grows. This avoids wasting work for small heaps and during startup. - - Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index baf8e95dffdfce..b5140057a69d36 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1161,19 +1161,27 @@ def make_ll(depth): return head head = make_ll(1000) + count = 1000 + + # There will be some objects we aren't counting, + # e.g. the gc stats dicts. This test checks + # that the counts don't grow, so we try to + # correct for the uncounted objects + # This is just an estimate. + CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - iterations = max(20_000, initial_heap_size) - for i in range(iterations): + for i in range(20_000): newhead = make_ll(20) + count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index de518b8dc5024b..1c9f86438dadc3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7064,7 +7064,9 @@ int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyTypeObject *tp = Py_TYPE(obj); - assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); + if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { + return 0; + } if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); if (values->valid) { diff --git a/Objects/genobject.c b/Objects/genobject.c index 33679afecb420f..e87f199c2504ba 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -882,7 +882,25 @@ PyTypeObject PyGen_Type = { gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ - .tp_finalize = _PyGen_Finalize, + 0, /* tp_base */ + 0, /* tp_dict */ + + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + _PyGen_Finalize, /* tp_finalize */ }; static PyObject * @@ -1224,7 +1242,24 @@ PyTypeObject PyCoro_Type = { coro_methods, /* tp_methods */ coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ - .tp_finalize = _PyGen_Finalize, + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + _PyGen_Finalize, /* tp_finalize */ }; static void @@ -1429,6 +1464,7 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) + static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1637,7 +1673,24 @@ PyTypeObject PyAsyncGen_Type = { async_gen_methods, /* tp_methods */ async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ - .tp_finalize = _PyGen_Finalize, + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + _PyGen_Finalize, /* tp_finalize */ }; @@ -1882,6 +1935,16 @@ PyTypeObject _PyAsyncGenASend_Type = { PyObject_SelfIter, /* tp_iter */ async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ .tp_finalize = async_gen_asend_finalize, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index cc95b9857e3f2d..2068d6aa9be52b 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2355,16 +2355,6 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg) return 0; } - -static int -plain_object_traverse(PyObject *self, visitproc visit, void *arg) -{ - PyTypeObject *type = Py_TYPE(self); - assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); - Py_VISIT(type); - return PyObject_VisitManagedDict(self, visit, arg); -} - static void clear_slots(PyTypeObject *type, PyObject *self) { @@ -4157,9 +4147,6 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; - if (type->tp_basicsize == sizeof(PyObject)) { - type->tp_traverse = plain_object_traverse; - } } type->tp_basicsize = slotoffset; diff --git a/Python/gc.c b/Python/gc.c index fd29a48518e71b..5b9588c8741b97 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1277,13 +1277,18 @@ gc_list_set_space(PyGC_Head *list, int space) * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of - * new objects (N) to the heap, and incremental collectors - * scan I objects from the old space. - * I > N must be true. We also want I > N * K to be where - * K > 2. Higher values of K mean that the old space is + * objects, S (for survivors) to the old space, and + * incremental collectors scan I objects from the old space. + * I > S must be true. We also want I > S * N to be where + * N > 1. Higher values of N mean that the old space is * scanned more rapidly. + * The default incremental threshold of 10 translates to + * N == 1.4 (1 + 4/threshold) */ -#define SCAN_RATE_DIVISOR 5 + +/* Divide by 10, so that the default incremental threshold of 10 + * scans objects at 1% of the heap size */ +#define SCAN_RATE_DIVISOR 10 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1325,76 +1330,69 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -typedef struct work_stack { - PyGC_Head *top; - int visited_space; -} WorkStack; - -/* Remove gc from the list it is currently in and push it to the stack */ -static inline void -push_to_stack(PyGC_Head *gc, WorkStack *stack) -{ - PyGC_Head *prev = GC_PREV(gc); - PyGC_Head *next = GC_NEXT(gc); - _PyGCHead_SET_NEXT(prev, next); - _PyGCHead_SET_PREV(next, prev); - _PyGCHead_SET_PREV(gc, stack->top); - stack->top = gc; -} - -static inline PyGC_Head * -pop_from_stack(WorkStack *stack) +#ifndef NDEBUG +static inline int +IS_IN_VISITED(PyGC_Head *gc, int visited_space) { - PyGC_Head *gc = stack->top; - stack->top = _PyGCHead_PREV(gc); - return gc; + assert(visited_space == 0 || other_space(visited_space) == 0); + return gc_old_space(gc) == visited_space; } +#endif -/* append list `from` to `stack`; `from` becomes an empty list */ -static void -move_list_to_stack(PyGC_Head *from, WorkStack *stack) -{ - if (!gc_list_is_empty(from)) { - PyGC_Head *from_head = GC_NEXT(from); - PyGC_Head *from_tail = GC_PREV(from); - _PyGCHead_SET_PREV(from_head, stack->top); - stack->top = from_tail; - gc_list_init(from); - } -} +struct container_and_flag { + PyGC_Head *container; + int visited_space; + intptr_t size; +}; -static inline void -move_to_stack(PyObject *op, WorkStack *stack, int visited_space) +/* A traversal callback for adding to container) */ +static int +visit_add_to_container(PyObject *op, void *arg) { - assert(op != NULL); - if (_PyObject_IS_GC(op)) { + OBJECT_STAT_INC(object_visits); + struct container_and_flag *cf = (struct container_and_flag *)arg; + int visited = cf->visited_space; + assert(visited == get_gc_state()->visited_space); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - assert(!_Py_IsImmortal(op)); + gc_old_space(gc) != visited) { gc_flip_old_space(gc); - push_to_stack(gc, stack); + gc_list_move(gc, cf->container); + cf->size++; } } + return 0; } -static void -move_unvisited(PyObject *op, WorkStack *stack, int visited_space) -{ - move_to_stack(op, stack, visited_space); -} - -#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V)) - -/* A traversal callback for adding to container */ -static int -visit_add_to_container(PyObject *op, void *arg) -{ - OBJECT_STAT_INC(object_visits); - WorkStack *stack = (WorkStack *)arg; - assert(stack->visited_space == get_gc_state()->visited_space); - move_to_stack(op, stack, stack->visited_space); - return 0; +static intptr_t +expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) +{ + struct container_and_flag arg = { + .container = container, + .visited_space = gcstate->visited_space, + .size = 0 + }; + assert(GC_NEXT(gc) == container); + while (gc != container) { + /* Survivors will be moved to visited space, so they should + * have been marked as visited */ + assert(IS_IN_VISITED(gc, gcstate->visited_space)); + PyObject *op = FROM_GC(gc); + assert(_PyObject_GC_IS_TRACKED(op)); + if (_Py_IsImmortal(op)) { + PyGC_Head *next = GC_NEXT(gc); + gc_list_move(gc, &get_gc_state()->permanent_generation.head); + gc = next; + continue; + } + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); + gc = GC_NEXT(gc); + } + return arg.size; } /* Do bookkeeping for a completed GC cycle */ @@ -1422,62 +1420,54 @@ completed_scavenge(GCState *gcstate) gc_list_set_space(&gcstate->old[not_visited].head, not_visited); } assert(gc_list_is_empty(&gcstate->old[visited].head)); + gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } -static void -frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) -{ - _PyStackRef *locals = frame->localsplus; - _PyStackRef *sp = frame->stackpointer; - if (frame->f_locals != NULL) { - move_unvisited(frame->f_locals, stack, visited_space); - } - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - move_unvisited(func, stack, visited_space); - while (sp > locals) { - sp--; - _PyStackRef ref = *sp; - if (!PyStackRef_IsNull(ref)) { - PyObject *op = PyStackRef_AsPyObjectBorrow(ref); - if (!_Py_IsImmortal(op)) { - move_unvisited(op, stack, visited_space); - } +static intptr_t +move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, reachable); + return 1; } } + return 0; } -static Py_ssize_t -move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) +static intptr_t +mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty - Py_ssize_t objects_marked = 0; - while (stack->top != NULL) { - PyGC_Head *gc = pop_from_stack(stack); + struct container_and_flag arg = { + .container = reachable, + .visited_space = visited_space, + .size = 0 + }; + while (!gc_list_is_empty(reachable)) { + PyGC_Head *gc = _PyGCHead_NEXT(reachable); assert(gc_old_space(gc) == visited_space); - gc_list_append(gc, visited); - objects_marked++; + gc_list_move(gc, visited); PyObject *op = FROM_GC(gc); - assert(PyObject_IS_GC(op)); - assert(_PyObject_GC_IS_TRACKED(op)); - if (_Py_IsImmortal(op)) { - _PyObject_GC_UNTRACK(op); - } - else { - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, visit_add_to_container, stack); - } + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); } gc_list_validate_space(visited, visited_space); - return objects_marked; + return arg.size; } static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - WorkStack stack; - stack.top = NULL; - stack.visited_space = visited_space; + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1490,7 +1480,27 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - frame_move_unvisited(frame, &stack, visited_space); + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, &reachable, visited_space); + while (sp > locals) { + sp--; + if (PyStackRef_IsNull(*sp)) { + continue; + } + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + objects_marked++; + gc_list_move(gc, &reachable); + } + } + } if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1503,31 +1513,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); - assert(stack.top == NULL); + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); return objects_marked; } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { - WorkStack stack; - stack.top = NULL; - stack.visited_space = visited_space; - MOVE_UNVISITED(interp->sysdict, &stack, visited_space); - MOVE_UNVISITED(interp->builtins, &stack, visited_space); - MOVE_UNVISITED(interp->dict, &stack, visited_space); + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); + objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); + objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); - MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); - MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); } - Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); - assert(stack.top == NULL); + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); return objects_marked; } @@ -1539,35 +1549,39 @@ mark_at_start(PyThreadState *tstate) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + gcstate->work_to_do -= objects_marked; gcstate->phase = GC_PHASE_COLLECT; validate_spaces(gcstate); return objects_marked; } - -/* See InternalDocs/garbage_collector.md for more details. */ -#define MAX_HEAP_PORTION_MULTIPLIER 5 -#define MARKING_PROGRESS_MULTIPLIER 2 - static intptr_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on two things. + /* The amount of work we want to do depends on three things. * 1. The number of new objects created - * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) + * 2. The growth in heap size since the last collection + * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + * + * For a steady state heap, the amount of work to do is three times the number + * of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. + * + * This could be improved by tracking survival rates, but it is still a + * large improvement on the non-marking approach. */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; - intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_portion > max_heap_portion) { - heap_portion = max_heap_portion; + intptr_t max_heap_fraction = new_objects*3/2; + intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_fraction > max_heap_fraction) { + heap_fraction = max_heap_fraction; } gcstate->young.count = 0; - return new_objects + heap_portion; + return new_objects + heap_fraction; } static void @@ -1580,37 +1594,36 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; + gcstate->work_to_do -= objects_marked; validate_spaces(gcstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + PyGC_Head increment; + gc_list_init(&increment); + int scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; + gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - PyGC_Head increment; - gc_list_init(&increment); - WorkStack working; - working.top = 0; - working.visited_space = gcstate->visited_space; - move_list_to_stack(&gcstate->young.head, &working); - Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + gc_list_merge(&gcstate->young.head, &increment); gc_list_validate_space(&increment, gcstate->visited_space); - assert(working.top == NULL); + Py_ssize_t increment_size = gc_list_size(&increment); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_set_old_space(gc, gcstate->visited_space); - push_to_stack(gc, &working); + gc_list_move(gc, &increment); + increment_size++; assert(!_Py_IsImmortal(FROM_GC(gc))); - increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); - assert(working.top == NULL); + gc_set_old_space(gc, gcstate->visited_space); + increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } - assert(increment_size == gc_list_size(&increment)); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); @@ -1619,6 +1632,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); @@ -1654,7 +1668,6 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_scavenge(gcstate); - gcstate->work_to_do = -gcstate->young.threshold; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From f4b31edf2d9d72878dab1f66a36913b5bcc848ec Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 10 Dec 2024 11:56:24 +0100 Subject: [PATCH 40/55] gh-127257: ssl: Raise OSError for ERR_LIB_SYS (GH-127361) From the ERR_raise manpage: ERR_LIB_SYS This "library code" indicates that a system error is being reported. In this case, the reason code given to `ERR_raise()` and `ERR_raise_data()` *must* be `errno(3)`. This PR only handles ERR_LIB_SYS for the high-lever error types SSL_ERROR_SYSCALL and SSL_ERROR_SSL, i.e., not the ones where OpenSSL indicates it has some more information about the issue. --- .../2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst | 2 ++ Modules/_ssl.c | 10 ++++++++++ 2 files changed, 12 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst diff --git a/Misc/NEWS.d/next/Library/2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst b/Misc/NEWS.d/next/Library/2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst new file mode 100644 index 00000000000000..fb0380cba0b607 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-28-14-14-46.gh-issue-127257.n6-jU9.rst @@ -0,0 +1,2 @@ +In :mod:`ssl`, system call failures that OpenSSL reports using +``ERR_LIB_SYS`` are now raised as :exc:`OSError`. diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 59c414f9ce1ceb..e7df132869fee6 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -667,6 +667,11 @@ PySSL_SetError(PySSLSocket *sslsock, const char *filename, int lineno) ERR_GET_REASON(e) == SSL_R_CERTIFICATE_VERIFY_FAILED) { type = state->PySSLCertVerificationErrorObject; } + if (ERR_GET_LIB(e) == ERR_LIB_SYS) { + // A system error is being reported; reason is set to errno + errno = ERR_GET_REASON(e); + return PyErr_SetFromErrno(PyExc_OSError); + } p = PY_SSL_ERROR_SYSCALL; } break; @@ -692,6 +697,11 @@ PySSL_SetError(PySSLSocket *sslsock, const char *filename, int lineno) errstr = "EOF occurred in violation of protocol"; } #endif + if (ERR_GET_LIB(e) == ERR_LIB_SYS) { + // A system error is being reported; reason is set to errno + errno = ERR_GET_REASON(e); + return PyErr_SetFromErrno(PyExc_OSError); + } break; } default: From cef0a90d8f3a94aa534593f39b4abf98165675b9 Mon Sep 17 00:00:00 2001 From: Melissa0x1f992 <70096546+Melissa0x1f992@users.noreply.github.com> Date: Tue, 10 Dec 2024 06:13:11 -0600 Subject: [PATCH 41/55] gh-126937: ctypes: fix TypeError when a field's size is >65535 bytes (GH-126938) Co-authored-by: Peter Bierma Co-authored-by: Terry Jan Reedy Co-authored-by: Petr Viktorin --- Lib/test/test_ctypes/test_struct_fields.py | 23 +++++++++++++++++++ ...-11-17-21-35-55.gh-issue-126937.qluVM0.rst | 3 +++ Modules/_ctypes/cfield.c | 10 ++++++-- Modules/_ctypes/stgdict.c | 4 ++-- 4 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-17-21-35-55.gh-issue-126937.qluVM0.rst diff --git a/Lib/test/test_ctypes/test_struct_fields.py b/Lib/test/test_ctypes/test_struct_fields.py index b5e165f3bae929..1b3e64efd410b8 100644 --- a/Lib/test/test_ctypes/test_struct_fields.py +++ b/Lib/test/test_ctypes/test_struct_fields.py @@ -1,4 +1,5 @@ import unittest +import sys from ctypes import Structure, Union, sizeof, c_char, c_int from ._support import (CField, Py_TPFLAGS_DISALLOW_INSTANTIATION, Py_TPFLAGS_IMMUTABLETYPE) @@ -75,6 +76,28 @@ def __init_subclass__(cls, **kwargs): 'ctypes state is not initialized'): class Subclass(BrokenStructure): ... + def test_max_field_size_gh126937(self): + # Classes for big structs should be created successfully. + # (But they most likely can't be instantiated.) + # Here we test the exact limit: the number of *bits* must fit + # in Py_ssize_t. + + class X(self.cls): + _fields_ = [('char', c_char),] + max_field_size = sys.maxsize // 8 + + class Y(self.cls): + _fields_ = [('largeField', X * max_field_size)] + class Z(self.cls): + _fields_ = [('largeField', c_char * max_field_size)] + + with self.assertRaises(ValueError): + class TooBig(self.cls): + _fields_ = [('largeField', X * (max_field_size + 1))] + with self.assertRaises(ValueError): + class TooBig(self.cls): + _fields_ = [('largeField', c_char * (max_field_size + 1))] + # __set__ and __get__ should raise a TypeError in case their self # argument is not a ctype instance. def test___set__(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-17-21-35-55.gh-issue-126937.qluVM0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-17-21-35-55.gh-issue-126937.qluVM0.rst new file mode 100644 index 00000000000000..8d7da0d4107021 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-17-21-35-55.gh-issue-126937.qluVM0.rst @@ -0,0 +1,3 @@ +Fix :exc:`TypeError` when a :class:`ctypes.Structure` has a field size +that doesn't fit into an unsigned 16-bit integer. +Instead, the maximum number of *bits* is :data:`sys.maxsize`. diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 3220852c8398e0..2b9e8a1a10d6f5 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -110,10 +110,16 @@ PyCField_new_impl(PyTypeObject *type, PyObject *name, PyObject *proto, goto error; } - Py_ssize_t bit_size = NUM_BITS(size); - if (bit_size) { + if (bit_size_obj != Py_None) { +#ifdef Py_DEBUG + Py_ssize_t bit_size = NUM_BITS(size); assert(bit_size > 0); assert(bit_size <= info->size * 8); + // Currently, the bit size is specified redundantly + // in NUM_BITS(size) and bit_size_obj. + // Verify that they match. + assert(PyLong_AsSsize_t(bit_size_obj) == bit_size); +#endif switch(info->ffi_type_pointer.type) { case FFI_TYPE_UINT8: case FFI_TYPE_UINT16: diff --git a/Modules/_ctypes/stgdict.c b/Modules/_ctypes/stgdict.c index 5dbbe0b3285d58..5ca5b62427600d 100644 --- a/Modules/_ctypes/stgdict.c +++ b/Modules/_ctypes/stgdict.c @@ -292,7 +292,7 @@ PyCStructUnionType_update_stginfo(PyObject *type, PyObject *fields, int isStruct if (!tmp) { goto error; } - Py_ssize_t total_align = PyLong_AsInt(tmp); + Py_ssize_t total_align = PyLong_AsSsize_t(tmp); Py_DECREF(tmp); if (total_align < 0) { if (!PyErr_Occurred()) { @@ -306,7 +306,7 @@ PyCStructUnionType_update_stginfo(PyObject *type, PyObject *fields, int isStruct if (!tmp) { goto error; } - Py_ssize_t total_size = PyLong_AsInt(tmp); + Py_ssize_t total_size = PyLong_AsSsize_t(tmp); Py_DECREF(tmp); if (total_size < 0) { if (!PyErr_Occurred()) { From 9af96f440618304e7cc609c246e1f8c8b2d7a119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:58:17 +0100 Subject: [PATCH 42/55] gh-127563: use `dk_log2_index_bytes=3` in empty dicts (GH-127568) This fixes a UBSan failure (unaligned zero-size memcpy) in `dictobject.c`. --- Objects/dictobject.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 1c9f86438dadc3..05c93a3e448181 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -588,11 +588,14 @@ estimate_log2_keysize(Py_ssize_t n) /* This immutable, empty PyDictKeysObject is used for PyDict_Clear() * (which cannot fail and thus can do no allocation). + * + * See https://github.com/python/cpython/pull/127568#discussion_r1868070614 + * for the rationale of using dk_log2_index_bytes=3 instead of 0. */ static PyDictKeysObject empty_keys_struct = { _Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */ 0, /* dk_log2_size */ - 0, /* dk_log2_index_bytes */ + 3, /* dk_log2_index_bytes */ DICT_KEYS_UNICODE, /* dk_kind */ #ifdef Py_GIL_DISABLED {0}, /* dk_mutex */ From c91ccbe4ac0ec15c503521f539b3528db85871b4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 10 Dec 2024 17:33:11 +0100 Subject: [PATCH 43/55] gh-59705: Set OS thread name when Thread.name is changed (#127702) Co-authored-by: Petr Viktorin --- Doc/library/threading.rst | 12 ++++++++++++ Lib/test/test_threading.py | 19 +++++++++++++++++++ Lib/threading.py | 16 +++++++++++----- Modules/_threadmodule.c | 3 +-- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index d4b343db36efb3..f183f3f535c4cb 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -434,6 +434,18 @@ since it is impossible to detect the termination of alien threads. Multiple threads may be given the same name. The initial name is set by the constructor. + On some platforms, the thread name is set at the operating system level + when the thread starts, so that it is visible in task managers. + This name may be truncated to fit in a system-specific limit (for example, + 15 bytes on Linux or 63 bytes on macOS). + + Changes to *name* are only reflected at the OS level when the currently + running thread is renamed. (Setting the *name* attribute of a + different thread only updates the Python Thread object.) + + .. versionchanged:: 3.14 + Set the operating system thread name. + .. method:: getName() setName() diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index d05161f46f1034..3e164a12581dd1 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2164,6 +2164,25 @@ def work(): self.assertEqual(work_name, expected, f"{len(work_name)=} and {len(expected)=}") + @unittest.skipUnless(hasattr(_thread, 'set_name'), "missing _thread.set_name") + @unittest.skipUnless(hasattr(_thread, '_get_name'), "missing _thread._get_name") + def test_change_name(self): + # Change the name of a thread while the thread is running + + name1 = None + name2 = None + def work(): + nonlocal name1, name2 + name1 = _thread._get_name() + threading.current_thread().name = "new name" + name2 = _thread._get_name() + + thread = threading.Thread(target=work, name="name") + thread.start() + thread.join() + self.assertEqual(name1, "name") + self.assertEqual(name2, "new name") + class InterruptMainTests(unittest.TestCase): def check_interrupt_main_with_signal_handler(self, signum): diff --git a/Lib/threading.py b/Lib/threading.py index 3abd22a2aa1b72..78e591124278fc 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -1026,16 +1026,20 @@ def _set_ident(self): def _set_native_id(self): self._native_id = get_native_id() + def _set_os_name(self): + if _set_name is None or not self._name: + return + try: + _set_name(self._name) + except OSError: + pass + def _bootstrap_inner(self): try: self._set_ident() if _HAVE_THREAD_NATIVE_ID: self._set_native_id() - if _set_name is not None and self._name: - try: - _set_name(self._name) - except OSError: - pass + self._set_os_name() self._started.set() with _active_limbo_lock: _active[self._ident] = self @@ -1115,6 +1119,8 @@ def name(self): def name(self, name): assert self._initialized, "Thread.__init__() not called" self._name = str(name) + if get_ident() == self._ident: + self._set_os_name() @property def ident(self): diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 35c032fbeaa94f..75b34a8df7622c 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -2423,8 +2423,7 @@ _thread_set_name_impl(PyObject *module, PyObject *name_obj) #ifdef PYTHREAD_NAME_MAXLEN // Truncate to PYTHREAD_NAME_MAXLEN bytes + the NUL byte if needed - size_t len = PyBytes_GET_SIZE(name_encoded); - if (len > PYTHREAD_NAME_MAXLEN) { + if (PyBytes_GET_SIZE(name_encoded) > PYTHREAD_NAME_MAXLEN) { PyObject *truncated; truncated = PyBytes_FromStringAndSize(PyBytes_AS_STRING(name_encoded), PYTHREAD_NAME_MAXLEN); From 035f512046337e64a018d11fdaa3b21758625291 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Wed, 11 Dec 2024 02:35:00 +0900 Subject: [PATCH 44/55] Docs: Fix indents in `xmlrpc.client.rst` (#127782) --- Doc/library/xmlrpc.client.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index c57f433e6efd98..971e65605841e7 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -64,11 +64,11 @@ between conformable Python objects and XML on the wire. The obsolete *use_datetime* flag is similar to *use_builtin_types* but it applies only to date/time values. -.. versionchanged:: 3.3 - The *use_builtin_types* flag was added. + .. versionchanged:: 3.3 + The *use_builtin_types* flag was added. -.. versionchanged:: 3.8 - The *headers* parameter was added. + .. versionchanged:: 3.8 + The *headers* parameter was added. Both the HTTP and HTTPS transports support the URL syntax extension for HTTP Basic Authentication: ``http://user:pass@host:port/path``. The ``user:pass`` From 51216857ca8283f5b41c8cf9874238da56da4968 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Wed, 11 Dec 2024 05:32:04 +0800 Subject: [PATCH 45/55] gh-126821: Add versionadded annotation to use_system_logger feature. (#127755) Add versionadded annotation to use_system_logger feature. --- Doc/c-api/init_config.rst | 2 ++ iOS/README.rst | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 7497bf241fb10e..6b33d93a9f2af9 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -1290,6 +1290,8 @@ PyConfig Default: ``0`` (don't use system log). + .. versionadded:: 3.13.2 + .. c:member:: int user_site_directory If non-zero, add the user site directory to :data:`sys.path`. diff --git a/iOS/README.rst b/iOS/README.rst index 9cea98cf1abbfa..13b885144932e4 100644 --- a/iOS/README.rst +++ b/iOS/README.rst @@ -286,7 +286,7 @@ This will: * Run the test suite on an "iPhone SE (3rd generation)" simulator. On success, the test suite will exit and report successful completion of the -test suite. On a 2022 M1 MacBook Pro, the test suite takes approximately 12 +test suite. On a 2022 M1 MacBook Pro, the test suite takes approximately 15 minutes to run; a couple of extra minutes is required to compile the testbed project, and then boot and prepare the iOS simulator. From 12b4f1a5a175d4dcec27631fce2883038f0917ae Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Wed, 11 Dec 2024 00:09:55 +0000 Subject: [PATCH 46/55] GH-127381: pathlib ABCs: remove `PathBase.samefile()` and rarer `is_*()` (#127709) Remove `PathBase.samefile()`, which is fairly specific to the local FS, and relies on `stat()`, which we're aiming to remove from `PathBase`. Also remove `PathBase.is_mount()`, `is_junction()`, `is_block_device()`, `is_char_device()`, `is_fifo()` and `is_socket()`. These rely on POSIX file type numbers that we're aiming to remove from the `PathBase` API. --- Lib/pathlib/_abc.py | 89 +---------------------- Lib/pathlib/_local.py | 65 ++++++++++++++++- Lib/test/test_pathlib/test_pathlib.py | 77 +++++++++++++++++++- Lib/test/test_pathlib/test_pathlib_abc.py | 81 --------------------- 4 files changed, 141 insertions(+), 171 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index f68685f21d6d79..02c6e0500617aa 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -16,7 +16,7 @@ import posixpath from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks -from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +from stat import S_ISDIR, S_ISLNK, S_ISREG from pathlib._os import copyfileobj @@ -408,26 +408,6 @@ def is_file(self, *, follow_symlinks=True): except (OSError, ValueError): return False - def is_mount(self): - """ - Check if this path is a mount point - """ - # Need to exist and be a dir - if not self.exists() or not self.is_dir(): - return False - - try: - parent_dev = self.parent.stat().st_dev - except OSError: - return False - - dev = self.stat().st_dev - if dev != parent_dev: - return True - ino = self.stat().st_ino - parent_ino = self.parent.stat().st_ino - return ino == parent_ino - def is_symlink(self): """ Whether this path is a symbolic link. @@ -437,76 +417,11 @@ def is_symlink(self): except (OSError, ValueError): return False - def is_junction(self): - """ - Whether this path is a junction. - """ - # Junctions are a Windows-only feature, not present in POSIX nor the - # majority of virtual filesystems. There is no cross-platform idiom - # to check for junctions (using stat().st_mode). - return False - - def is_block_device(self): - """ - Whether this path is a block device. - """ - try: - return S_ISBLK(self.stat().st_mode) - except (OSError, ValueError): - return False - - def is_char_device(self): - """ - Whether this path is a character device. - """ - try: - return S_ISCHR(self.stat().st_mode) - except (OSError, ValueError): - return False - - def is_fifo(self): - """ - Whether this path is a FIFO. - """ - try: - return S_ISFIFO(self.stat().st_mode) - except (OSError, ValueError): - return False - - def is_socket(self): - """ - Whether this path is a socket. - """ - try: - return S_ISSOCK(self.stat().st_mode) - except (OSError, ValueError): - return False - - def samefile(self, other_path): - """Return whether other_path is the same or not as this file - (as returned by os.path.samefile()). - """ - st = self.stat() - try: - other_st = other_path.stat() - except AttributeError: - other_st = self.with_segments(other_path).stat() - return (st.st_ino == other_st.st_ino and - st.st_dev == other_st.st_dev) - def _ensure_different_file(self, other_path): """ Raise OSError(EINVAL) if both paths refer to the same file. """ - try: - if not self.samefile(other_path): - return - except (OSError, ValueError): - return - err = OSError(EINVAL, "Source and target are the same file") - err.filename = str(self) - err.filename2 = str(other_path) - raise err + pass def _ensure_distinct_path(self, other_path): """ diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index f87069ce70a2de..85437ec80bfcc4 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -4,9 +4,10 @@ import os import posixpath import sys -from errno import EXDEV +from errno import EINVAL, EXDEV from glob import _StringGlobber from itertools import chain +from stat import S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from _collections_abc import Sequence try: @@ -596,6 +597,68 @@ def is_junction(self): """ return os.path.isjunction(self) + def is_block_device(self): + """ + Whether this path is a block device. + """ + try: + return S_ISBLK(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_char_device(self): + """ + Whether this path is a character device. + """ + try: + return S_ISCHR(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_fifo(self): + """ + Whether this path is a FIFO. + """ + try: + return S_ISFIFO(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_socket(self): + """ + Whether this path is a socket. + """ + try: + return S_ISSOCK(self.stat().st_mode) + except (OSError, ValueError): + return False + + def samefile(self, other_path): + """Return whether other_path is the same or not as this file + (as returned by os.path.samefile()). + """ + st = self.stat() + try: + other_st = other_path.stat() + except AttributeError: + other_st = self.with_segments(other_path).stat() + return (st.st_ino == other_st.st_ino and + st.st_dev == other_st.st_dev) + + def _ensure_different_file(self, other_path): + """ + Raise OSError(EINVAL) if both paths refer to the same file. + """ + try: + if not self.samefile(other_path): + return + except (OSError, ValueError): + return + err = OSError(EINVAL, "Source and target are the same file") + err.filename = str(self) + err.filename2 = str(other_path) + raise err + def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index ce0f4748c860b1..b57ef420bfcbcd 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1786,13 +1786,31 @@ def test_lstat_nosymlink(self): st = p.stat() self.assertEqual(st, p.lstat()) - def test_is_junction(self): + def test_is_junction_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_junction()) + self.assertFalse((P / 'dirA').is_junction()) + self.assertFalse((P / 'non-existing').is_junction()) + self.assertFalse((P / 'fileA' / 'bah').is_junction()) + self.assertFalse((P / 'fileA\udfff').is_junction()) + self.assertFalse((P / 'fileA\x00').is_junction()) + + def test_is_junction_true(self): P = self.cls(self.base) with mock.patch.object(P.parser, 'isjunction'): self.assertEqual(P.is_junction(), P.parser.isjunction.return_value) P.parser.isjunction.assert_called_once_with(P) + def test_is_fifo_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_fifo()) + self.assertFalse((P / 'dirA').is_fifo()) + self.assertFalse((P / 'non-existing').is_fifo()) + self.assertFalse((P / 'fileA' / 'bah').is_fifo()) + self.assertIs((P / 'fileA\udfff').is_fifo(), False) + self.assertIs((P / 'fileA\x00').is_fifo(), False) + @unittest.skipUnless(hasattr(os, "mkfifo"), "os.mkfifo() required") @unittest.skipIf(sys.platform == "vxworks", "fifo requires special path on VxWorks") @@ -1808,6 +1826,15 @@ def test_is_fifo_true(self): self.assertIs(self.cls(self.base, 'myfifo\udfff').is_fifo(), False) self.assertIs(self.cls(self.base, 'myfifo\x00').is_fifo(), False) + def test_is_socket_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_socket()) + self.assertFalse((P / 'dirA').is_socket()) + self.assertFalse((P / 'non-existing').is_socket()) + self.assertFalse((P / 'fileA' / 'bah').is_socket()) + self.assertIs((P / 'fileA\udfff').is_socket(), False) + self.assertIs((P / 'fileA\x00').is_socket(), False) + @unittest.skipUnless(hasattr(socket, "AF_UNIX"), "Unix sockets required") @unittest.skipIf( is_emscripten, "Unix sockets are not implemented on Emscripten." @@ -1831,6 +1858,24 @@ def test_is_socket_true(self): self.assertIs(self.cls(self.base, 'mysock\udfff').is_socket(), False) self.assertIs(self.cls(self.base, 'mysock\x00').is_socket(), False) + def test_is_block_device_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_block_device()) + self.assertFalse((P / 'dirA').is_block_device()) + self.assertFalse((P / 'non-existing').is_block_device()) + self.assertFalse((P / 'fileA' / 'bah').is_block_device()) + self.assertIs((P / 'fileA\udfff').is_block_device(), False) + self.assertIs((P / 'fileA\x00').is_block_device(), False) + + def test_is_char_device_false(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_char_device()) + self.assertFalse((P / 'dirA').is_char_device()) + self.assertFalse((P / 'non-existing').is_char_device()) + self.assertFalse((P / 'fileA' / 'bah').is_char_device()) + self.assertIs((P / 'fileA\udfff').is_char_device(), False) + self.assertIs((P / 'fileA\x00').is_char_device(), False) + def test_is_char_device_true(self): # os.devnull should generally be a char device. P = self.cls(os.devnull) @@ -1842,7 +1887,14 @@ def test_is_char_device_true(self): self.assertIs(self.cls(f'{os.devnull}\udfff').is_char_device(), False) self.assertIs(self.cls(f'{os.devnull}\x00').is_char_device(), False) - def test_is_mount_root(self): + def test_is_mount(self): + P = self.cls(self.base) + self.assertFalse((P / 'fileA').is_mount()) + self.assertFalse((P / 'dirA').is_mount()) + self.assertFalse((P / 'non-existing').is_mount()) + self.assertFalse((P / 'fileA' / 'bah').is_mount()) + if self.can_symlink: + self.assertFalse((P / 'linkA').is_mount()) if os.name == 'nt': R = self.cls('c:\\') else: @@ -1850,6 +1902,27 @@ def test_is_mount_root(self): self.assertTrue(R.is_mount()) self.assertFalse((R / '\udfff').is_mount()) + def test_samefile(self): + parser = self.parser + fileA_path = parser.join(self.base, 'fileA') + fileB_path = parser.join(self.base, 'dirB', 'fileB') + p = self.cls(fileA_path) + pp = self.cls(fileA_path) + q = self.cls(fileB_path) + self.assertTrue(p.samefile(fileA_path)) + self.assertTrue(p.samefile(pp)) + self.assertFalse(p.samefile(fileB_path)) + self.assertFalse(p.samefile(q)) + # Test the non-existent file case + non_existent = parser.join(self.base, 'foo') + r = self.cls(non_existent) + self.assertRaises(FileNotFoundError, p.samefile, r) + self.assertRaises(FileNotFoundError, p.samefile, non_existent) + self.assertRaises(FileNotFoundError, r.samefile, p) + self.assertRaises(FileNotFoundError, r.samefile, non_existent) + self.assertRaises(FileNotFoundError, r.samefile, r) + self.assertRaises(FileNotFoundError, r.samefile, non_existent) + def test_passing_kwargs_errors(self): with self.assertRaises(TypeError): self.cls(foo="bar") diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index dd9425ce393623..f4c364c6fe5109 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1300,15 +1300,9 @@ def test_unsupported_operation(self): e = UnsupportedOperation self.assertRaises(e, p.stat) self.assertRaises(e, p.exists) - self.assertRaises(e, p.samefile, 'foo') self.assertRaises(e, p.is_dir) self.assertRaises(e, p.is_file) - self.assertRaises(e, p.is_mount) self.assertRaises(e, p.is_symlink) - self.assertRaises(e, p.is_block_device) - self.assertRaises(e, p.is_char_device) - self.assertRaises(e, p.is_fifo) - self.assertRaises(e, p.is_socket) self.assertRaises(e, p.open) self.assertRaises(e, p.read_bytes) self.assertRaises(e, p.read_text) @@ -1535,27 +1529,6 @@ def assertEqualNormCase(self, path_a, path_b): normcase = self.parser.normcase self.assertEqual(normcase(path_a), normcase(path_b)) - def test_samefile(self): - parser = self.parser - fileA_path = parser.join(self.base, 'fileA') - fileB_path = parser.join(self.base, 'dirB', 'fileB') - p = self.cls(fileA_path) - pp = self.cls(fileA_path) - q = self.cls(fileB_path) - self.assertTrue(p.samefile(fileA_path)) - self.assertTrue(p.samefile(pp)) - self.assertFalse(p.samefile(fileB_path)) - self.assertFalse(p.samefile(q)) - # Test the non-existent file case - non_existent = parser.join(self.base, 'foo') - r = self.cls(non_existent) - self.assertRaises(FileNotFoundError, p.samefile, r) - self.assertRaises(FileNotFoundError, p.samefile, non_existent) - self.assertRaises(FileNotFoundError, r.samefile, p) - self.assertRaises(FileNotFoundError, r.samefile, non_existent) - self.assertRaises(FileNotFoundError, r.samefile, r) - self.assertRaises(FileNotFoundError, r.samefile, non_existent) - def test_exists(self): P = self.cls p = P(self.base) @@ -2115,15 +2088,6 @@ def test_is_file_no_follow_symlinks(self): self.assertFalse((P / 'fileA\udfff').is_file(follow_symlinks=False)) self.assertFalse((P / 'fileA\x00').is_file(follow_symlinks=False)) - def test_is_mount(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_mount()) - self.assertFalse((P / 'dirA').is_mount()) - self.assertFalse((P / 'non-existing').is_mount()) - self.assertFalse((P / 'fileA' / 'bah').is_mount()) - if self.can_symlink: - self.assertFalse((P / 'linkA').is_mount()) - def test_is_symlink(self): P = self.cls(self.base) self.assertFalse((P / 'fileA').is_symlink()) @@ -2140,51 +2104,6 @@ def test_is_symlink(self): self.assertIs((P / 'linkA\udfff').is_file(), False) self.assertIs((P / 'linkA\x00').is_file(), False) - def test_is_junction_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_junction()) - self.assertFalse((P / 'dirA').is_junction()) - self.assertFalse((P / 'non-existing').is_junction()) - self.assertFalse((P / 'fileA' / 'bah').is_junction()) - self.assertFalse((P / 'fileA\udfff').is_junction()) - self.assertFalse((P / 'fileA\x00').is_junction()) - - def test_is_fifo_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_fifo()) - self.assertFalse((P / 'dirA').is_fifo()) - self.assertFalse((P / 'non-existing').is_fifo()) - self.assertFalse((P / 'fileA' / 'bah').is_fifo()) - self.assertIs((P / 'fileA\udfff').is_fifo(), False) - self.assertIs((P / 'fileA\x00').is_fifo(), False) - - def test_is_socket_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_socket()) - self.assertFalse((P / 'dirA').is_socket()) - self.assertFalse((P / 'non-existing').is_socket()) - self.assertFalse((P / 'fileA' / 'bah').is_socket()) - self.assertIs((P / 'fileA\udfff').is_socket(), False) - self.assertIs((P / 'fileA\x00').is_socket(), False) - - def test_is_block_device_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_block_device()) - self.assertFalse((P / 'dirA').is_block_device()) - self.assertFalse((P / 'non-existing').is_block_device()) - self.assertFalse((P / 'fileA' / 'bah').is_block_device()) - self.assertIs((P / 'fileA\udfff').is_block_device(), False) - self.assertIs((P / 'fileA\x00').is_block_device(), False) - - def test_is_char_device_false(self): - P = self.cls(self.base) - self.assertFalse((P / 'fileA').is_char_device()) - self.assertFalse((P / 'dirA').is_char_device()) - self.assertFalse((P / 'non-existing').is_char_device()) - self.assertFalse((P / 'fileA' / 'bah').is_char_device()) - self.assertIs((P / 'fileA\udfff').is_char_device(), False) - self.assertIs((P / 'fileA\x00').is_char_device(), False) - def test_delete_file(self): p = self.cls(self.base) / 'fileA' p._delete() From db9bea0386c1c0b6c9d7c66474cda7e47e4b56f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Srinivas=20Reddy=20Thatiparthy=20=28=E0=B0=A4=E0=B0=BE?= =?UTF-8?q?=E0=B0=9F=E0=B0=BF=E0=B0=AA=E0=B0=B0=E0=B1=8D=E0=B0=A4=E0=B0=BF?= =?UTF-8?q?=20=E0=B0=B6=E0=B1=8D=E0=B0=B0=E0=B1=80=E0=B0=A8=E0=B0=BF?= =?UTF-8?q?=E0=B0=B5=E0=B0=BE=E0=B0=B8=E0=B1=8D=20=20=E0=B0=B0=E0=B1=86?= =?UTF-8?q?=E0=B0=A1=E0=B1=8D=E0=B0=A1=E0=B0=BF=29?= Date: Wed, 11 Dec 2024 13:05:17 +0530 Subject: [PATCH 47/55] gh-127740: For odd-length input to bytes.fromhex(...) change the error message to ValueError: fromhex() arg must be of even length (#127756) --- Lib/test/test_bytes.py | 6 ++++++ ...4-12-10-21-08-05.gh-issue-127740.0tWC9h.rst | 3 +++ Objects/bytesobject.c | 18 ++++++++++++++---- 3 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-10-21-08-05.gh-issue-127740.0tWC9h.rst diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index 9e1985bb3a7639..32cd178fa3b445 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -459,6 +459,12 @@ def test_fromhex(self): self.assertRaises(ValueError, self.type2test.fromhex, '\x00') self.assertRaises(ValueError, self.type2test.fromhex, '12 \x00 34') + # For odd number of character(s) + for value in ("a", "aaa", "deadbee"): + with self.assertRaises(ValueError) as cm: + self.type2test.fromhex(value) + self.assertIn("fromhex() arg must contain an even number of hexadecimal digits", str(cm.exception)) + for data, pos in ( # invalid first hexadecimal character ('12 x4 56', 3), diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-10-21-08-05.gh-issue-127740.0tWC9h.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-10-21-08-05.gh-issue-127740.0tWC9h.rst new file mode 100644 index 00000000000000..f614dbb59bdc87 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-10-21-08-05.gh-issue-127740.0tWC9h.rst @@ -0,0 +1,3 @@ +Fix error message in :func:`bytes.fromhex` when given an odd number of +digits to properly indicate that an even number of hexadecimal digits is +required. diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 8c7651f0f3aa45..533089d25cd73a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2543,7 +2543,12 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) bot = _PyLong_DigitValue[*str]; if (bot >= 16) { - invalid_char = str - PyUnicode_1BYTE_DATA(string); + /* Check if we had a second digit */ + if (str >= end){ + invalid_char = -1; + } else { + invalid_char = str - PyUnicode_1BYTE_DATA(string); + } goto error; } str++; @@ -2554,9 +2559,14 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) return _PyBytesWriter_Finish(&writer, buf); error: - PyErr_Format(PyExc_ValueError, - "non-hexadecimal number found in " - "fromhex() arg at position %zd", invalid_char); + if (invalid_char == -1) { + PyErr_SetString(PyExc_ValueError, + "fromhex() arg must contain an even number of hexadecimal digits"); + } else { + PyErr_Format(PyExc_ValueError, + "non-hexadecimal number found in " + "fromhex() arg at position %zd", invalid_char); + } _PyBytesWriter_Dealloc(&writer); return NULL; } From 2cdeb61b57e638ae46a04386330a12abe9cddf2c Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 11 Dec 2024 04:27:07 -0500 Subject: [PATCH 48/55] Add `extern "C"` around `PyTraceMalloc_` functions. (#127772) Pretty much everything else exported by Python.h has an extern "C" annotation, yet this header appears to be missing one. --- Include/cpython/tracemalloc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Include/cpython/tracemalloc.h b/Include/cpython/tracemalloc.h index 61a16ea9a9f3eb..6d094291ae2e90 100644 --- a/Include/cpython/tracemalloc.h +++ b/Include/cpython/tracemalloc.h @@ -1,6 +1,9 @@ #ifndef Py_LIMITED_API #ifndef Py_TRACEMALLOC_H #define Py_TRACEMALLOC_H +#ifdef __cplusplus +extern "C" { +#endif /* Track an allocated memory block in the tracemalloc module. Return 0 on success, return -1 on error (failed to allocate memory to store @@ -22,5 +25,8 @@ PyAPI_FUNC(int) PyTraceMalloc_Untrack( unsigned int domain, uintptr_t ptr); +#ifdef __cplusplus +} +#endif #endif // !Py_TRACEMALLOC_H #endif // !Py_LIMITED_API From d5d84c3f13fe7fe591b375c41979d362bc11957a Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Wed, 11 Dec 2024 06:14:04 -0500 Subject: [PATCH 49/55] gh-127791: Fix, document, and test `PyUnstable_AtExit` (#127793) --- Doc/c-api/init.rst | 9 ++++ Doc/c-api/sys.rst | 4 ++ Include/internal/pycore_atexit.h | 1 - ...-12-10-14-25-22.gh-issue-127791.YRw4GU.rst | 2 + Modules/_testcapimodule.c | 48 +++++++++++++++++++ Modules/_testinternalcapi.c | 34 ------------- Modules/atexitmodule.c | 12 +++-- 7 files changed, 71 insertions(+), 39 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-12-10-14-25-22.gh-issue-127791.YRw4GU.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index ba1c2852f0bd53..dd63dd013e32dc 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -567,6 +567,15 @@ Initializing and finalizing the interpreter customized Python that always runs in isolated mode using :c:func:`Py_RunMain`. +.. c:function:: int PyUnstable_AtExit(PyInterpreterState *interp, void (*func)(void *), void *data) + + Register an :mod:`atexit` callback for the target interpreter *interp*. + This is similar to :c:func:`Py_AtExit`, but takes an explicit interpreter and + data pointer for the callback. + + The :term:`GIL` must be held for *interp*. + + .. versionadded:: 3.13 Process-wide parameters ======================= diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index d6fca1a0b0a219..c688afdca8231d 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -426,3 +426,7 @@ Process Control function registered last is called first. Each cleanup function will be called at most once. Since Python's internal finalization will have completed before the cleanup function, no Python APIs should be called by *func*. + + .. seealso:: + + :c:func:`PyUnstable_AtExit` for passing a ``void *data`` argument. diff --git a/Include/internal/pycore_atexit.h b/Include/internal/pycore_atexit.h index 507a5c03cbc792..cde5b530baf00c 100644 --- a/Include/internal/pycore_atexit.h +++ b/Include/internal/pycore_atexit.h @@ -44,7 +44,6 @@ typedef struct { struct atexit_state { atexit_callback *ll_callbacks; - atexit_callback *last_ll_callback; // XXX The rest of the state could be moved to the atexit module state // and a low-level callback added for it during module exec. diff --git a/Misc/NEWS.d/next/C_API/2024-12-10-14-25-22.gh-issue-127791.YRw4GU.rst b/Misc/NEWS.d/next/C_API/2024-12-10-14-25-22.gh-issue-127791.YRw4GU.rst new file mode 100644 index 00000000000000..70751f18f5cf17 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-10-14-25-22.gh-issue-127791.YRw4GU.rst @@ -0,0 +1,2 @@ +Fix loss of callbacks after more than one call to +:c:func:`PyUnstable_AtExit`. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 26f68691e44f83..8d86b535effb9a 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3353,6 +3353,53 @@ type_freeze(PyObject *module, PyObject *args) Py_RETURN_NONE; } +struct atexit_data { + int called; + PyThreadState *tstate; + PyInterpreterState *interp; +}; + +static void +atexit_callback(void *data) +{ + struct atexit_data *at_data = (struct atexit_data *)data; + // Ensure that the callback is from the same interpreter + assert(PyThreadState_Get() == at_data->tstate); + assert(PyInterpreterState_Get() == at_data->interp); + ++at_data->called; +} + +static PyObject * +test_atexit(PyObject *self, PyObject *Py_UNUSED(args)) +{ + PyThreadState *oldts = PyThreadState_Swap(NULL); + PyThreadState *tstate = Py_NewInterpreter(); + + struct atexit_data data = {0}; + data.tstate = PyThreadState_Get(); + data.interp = PyInterpreterState_Get(); + + int amount = 10; + for (int i = 0; i < amount; ++i) + { + int res = PyUnstable_AtExit(tstate->interp, atexit_callback, (void *)&data); + if (res < 0) { + Py_EndInterpreter(tstate); + PyThreadState_Swap(oldts); + PyErr_SetString(PyExc_RuntimeError, "atexit callback failed"); + return NULL; + } + } + + Py_EndInterpreter(tstate); + PyThreadState_Swap(oldts); + + if (data.called != amount) { + PyErr_SetString(PyExc_RuntimeError, "atexit callback not called"); + return NULL; + } + Py_RETURN_NONE; +} static PyMethodDef TestMethods[] = { {"set_errno", set_errno, METH_VARARGS}, @@ -3495,6 +3542,7 @@ static PyMethodDef TestMethods[] = { {"test_critical_sections", test_critical_sections, METH_NOARGS}, {"finalize_thread_hang", finalize_thread_hang, METH_O, NULL}, {"type_freeze", type_freeze, METH_VARARGS}, + {"test_atexit", test_atexit, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 1bb71a3e80b39d..288daf09a5fe5c 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1236,39 +1236,6 @@ unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); } - -struct atexit_data { - int called; -}; - -static void -callback(void *data) -{ - ((struct atexit_data *)data)->called += 1; -} - -static PyObject * -test_atexit(PyObject *self, PyObject *Py_UNUSED(args)) -{ - PyThreadState *oldts = PyThreadState_Swap(NULL); - PyThreadState *tstate = Py_NewInterpreter(); - - struct atexit_data data = {0}; - int res = PyUnstable_AtExit(tstate->interp, callback, (void *)&data); - Py_EndInterpreter(tstate); - PyThreadState_Swap(oldts); - if (res < 0) { - return NULL; - } - - if (data.called == 0) { - PyErr_SetString(PyExc_RuntimeError, "atexit callback not called"); - return NULL; - } - Py_RETURN_NONE; -} - - static PyObject * test_pyobject_is_freed(const char *test_name, PyObject *op) { @@ -2128,7 +2095,6 @@ static PyMethodDef module_functions[] = { {"_PyTraceMalloc_GetTraceback", tracemalloc_get_traceback, METH_VARARGS}, {"test_tstate_capi", test_tstate_capi, METH_NOARGS, NULL}, {"_PyUnicode_TransformDecimalAndSpaceToASCII", unicode_transformdecimalandspacetoascii, METH_O}, - {"test_atexit", test_atexit, METH_NOARGS}, {"check_pyobject_forbidden_bytes_is_freed", check_pyobject_forbidden_bytes_is_freed, METH_NOARGS}, {"check_pyobject_freed_is_freed", check_pyobject_freed_is_freed, METH_NOARGS}, diff --git a/Modules/atexitmodule.c b/Modules/atexitmodule.c index 297a8d74ba3bf4..c009235b7a36c2 100644 --- a/Modules/atexitmodule.c +++ b/Modules/atexitmodule.c @@ -27,7 +27,10 @@ int PyUnstable_AtExit(PyInterpreterState *interp, atexit_datacallbackfunc func, void *data) { - assert(interp == _PyInterpreterState_GET()); + PyThreadState *tstate = _PyThreadState_GET(); + _Py_EnsureTstateNotNULL(tstate); + assert(tstate->interp == interp); + atexit_callback *callback = PyMem_Malloc(sizeof(atexit_callback)); if (callback == NULL) { PyErr_NoMemory(); @@ -38,12 +41,13 @@ PyUnstable_AtExit(PyInterpreterState *interp, callback->next = NULL; struct atexit_state *state = &interp->atexit; - if (state->ll_callbacks == NULL) { + atexit_callback *top = state->ll_callbacks; + if (top == NULL) { state->ll_callbacks = callback; - state->last_ll_callback = callback; } else { - state->last_ll_callback->next = callback; + callback->next = top; + state->ll_callbacks = callback; } return 0; } From ce76b547f94de6b1c9c74657b4e8f150365ad76f Mon Sep 17 00:00:00 2001 From: Justin Applegate <70449145+Legoclones@users.noreply.github.com> Date: Wed, 11 Dec 2024 05:37:59 -0700 Subject: [PATCH 50/55] gh-126992: Change pickle code to base 10 for load_long and load_int (GH-127042) Co-authored-by: Serhiy Storchaka --- Lib/pickle.py | 4 +- Lib/test/pickletester.py | 20 ++++++++++ Lib/test/test_pickletools.py | 37 +++++++++++++++++++ ...-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst | 1 + Modules/_pickle.c | 11 ++---- 5 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst diff --git a/Lib/pickle.py b/Lib/pickle.py index 25dadb3f75a573..1920973e3f83e9 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1387,7 +1387,7 @@ def load_int(self): elif data == TRUE[1:]: val = True else: - val = int(data, 0) + val = int(data) self.append(val) dispatch[INT[0]] = load_int @@ -1407,7 +1407,7 @@ def load_long(self): val = self.readline()[:-1] if val and val[-1] == b'L'[0]: val = val[:-1] - self.append(int(val, 0)) + self.append(int(val)) dispatch[LONG[0]] = load_long def load_long1(self): diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index cf020a48b81cfa..bdc7ef62943a28 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1012,6 +1012,26 @@ def test_constants(self): self.assertIs(self.loads(b'I01\n.'), True) self.assertIs(self.loads(b'I00\n.'), False) + def test_zero_padded_integers(self): + self.assertEqual(self.loads(b'I010\n.'), 10) + self.assertEqual(self.loads(b'I-010\n.'), -10) + self.assertEqual(self.loads(b'I0010\n.'), 10) + self.assertEqual(self.loads(b'I-0010\n.'), -10) + self.assertEqual(self.loads(b'L010\n.'), 10) + self.assertEqual(self.loads(b'L-010\n.'), -10) + self.assertEqual(self.loads(b'L0010\n.'), 10) + self.assertEqual(self.loads(b'L-0010\n.'), -10) + self.assertEqual(self.loads(b'L010L\n.'), 10) + self.assertEqual(self.loads(b'L-010L\n.'), -10) + + def test_nondecimal_integers(self): + self.assertRaises(ValueError, self.loads, b'I0b10\n.') + self.assertRaises(ValueError, self.loads, b'I0o10\n.') + self.assertRaises(ValueError, self.loads, b'I0x10\n.') + self.assertRaises(ValueError, self.loads, b'L0b10L\n.') + self.assertRaises(ValueError, self.loads, b'L0o10L\n.') + self.assertRaises(ValueError, self.loads, b'L0x10L\n.') + def test_empty_bytestring(self): # issue 11286 empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r') diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py index 265dc497ccb86c..a178d3353eecdf 100644 --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -443,6 +443,43 @@ def test_persid(self): highest protocol among opcodes = 0 ''') + def test_constants(self): + self.check_dis(b"(NI00\nI01\n\x89\x88t.", '''\ + 0: ( MARK + 1: N NONE + 2: I INT False + 6: I INT True + 10: \\x89 NEWFALSE + 11: \\x88 NEWTRUE + 12: t TUPLE (MARK at 0) + 13: . STOP +highest protocol among opcodes = 2 +''') + + def test_integers(self): + self.check_dis(b"(I0\nI1\nI10\nI011\nL12\nL13L\nL014\nL015L\nt.", '''\ + 0: ( MARK + 1: I INT 0 + 4: I INT 1 + 7: I INT 10 + 11: I INT 11 + 16: L LONG 12 + 20: L LONG 13 + 25: L LONG 14 + 30: L LONG 15 + 36: t TUPLE (MARK at 0) + 37: . STOP +highest protocol among opcodes = 0 +''') + + def test_nondecimal_integers(self): + self.check_dis_error(b'I0b10\n.', '', 'invalid literal for int') + self.check_dis_error(b'I0o10\n.', '', 'invalid literal for int') + self.check_dis_error(b'I0x10\n.', '', 'invalid literal for int') + self.check_dis_error(b'L0b10L\n.', '', 'invalid literal for int') + self.check_dis_error(b'L0o10L\n.', '', 'invalid literal for int') + self.check_dis_error(b'L0x10L\n.', '', 'invalid literal for int') + class MiscTestCase(unittest.TestCase): def test__all__(self): diff --git a/Misc/NEWS.d/next/Library/2024-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst b/Misc/NEWS.d/next/Library/2024-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst new file mode 100644 index 00000000000000..526785f68cc807 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-20-21-20-56.gh-issue-126992.RbU0FZ.rst @@ -0,0 +1 @@ +Fix LONG and INT opcodes to only use base 10 for string to integer conversion in :mod:`pickle`. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 2696f38046121f..599b5f92c2a1f7 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -5211,16 +5211,14 @@ load_int(PickleState *state, UnpicklerObject *self) return bad_readline(state); errno = 0; - /* XXX: Should the base argument of strtol() be explicitly set to 10? - XXX(avassalotti): Should this uses PyOS_strtol()? */ - x = strtol(s, &endptr, 0); + /* XXX(avassalotti): Should this uses PyOS_strtol()? */ + x = strtol(s, &endptr, 10); if (errno || (*endptr != '\n' && *endptr != '\0')) { /* Hm, maybe we've got something long. Let's try reading * it as a Python int object. */ errno = 0; - /* XXX: Same thing about the base here. */ - value = PyLong_FromString(s, NULL, 0); + value = PyLong_FromString(s, NULL, 10); if (value == NULL) { PyErr_SetString(PyExc_ValueError, "could not convert string to int"); @@ -5370,8 +5368,7 @@ load_long(PickleState *state, UnpicklerObject *self) the 'L' to be present. */ if (s[len-2] == 'L') s[len-2] = '\0'; - /* XXX: Should the base argument explicitly set to 10? */ - value = PyLong_FromString(s, NULL, 0); + value = PyLong_FromString(s, NULL, 10); if (value == NULL) return -1; From b2ad7e0a2c1518539d9b0ef83c9f7a09d10fd303 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:57:51 +0200 Subject: [PATCH 51/55] CI: Use bash to properly expand variable (#127822) --- .github/workflows/reusable-windows-msi.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index d0d53dba0b45d1..a1c45d954247fb 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -24,4 +24,5 @@ jobs: with: persist-credentials: false - name: Build CPython installer - run: .\Tools\msi\build.bat --doc -"${ARCH}" + run: ./Tools/msi/build.bat --doc -"${ARCH}" + shell: bash From 359389ed51aecc107681e600b71852c0a97304e1 Mon Sep 17 00:00:00 2001 From: Nano Date: Wed, 11 Dec 2024 21:28:19 +0800 Subject: [PATCH 52/55] gh-123401: Fix http.cookies module to support obsolete RFC 850 date format (#123405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Wulian <1055917385@qq.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Victor Stinner --- Lib/http/cookies.py | 6 ++- Lib/test/test_http_cookies.py | 46 +++++++++++++++++++ ...-08-27-18-58-01.gh-issue-123401.t4-FpI.rst | 3 ++ 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-27-18-58-01.gh-issue-123401.t4-FpI.rst diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index d7e8d08b2d92c1..23d5461f86fc23 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -425,9 +425,11 @@ def OutputString(self, attrs=None): ( # Optional group: there may not be a value. \s*=\s* # Equal Sign (?P # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any doublequoted string + "(?:[^\\"]|\\.)*" # Any double-quoted string | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + # Special case for "expires" attr + (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day + [\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Date and time in specific format | # or [""" + _LegalValueChars + r"""]* # Any word or empty string ) # End of group 'val' diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py index 8879902a6e2f41..7b3dc0fdaedc3b 100644 --- a/Lib/test/test_http_cookies.py +++ b/Lib/test/test_http_cookies.py @@ -59,6 +59,52 @@ def test_basic(self): for k, v in sorted(case['dict'].items()): self.assertEqual(C[k].value, v) + def test_obsolete_rfc850_date_format(self): + # Test cases with different days and dates in obsolete RFC 850 format + test_cases = [ + # from RFC 850, change EST to GMT + # https://datatracker.ietf.org/doc/html/rfc850#section-2 + { + 'data': 'key=value; expires=Saturday, 01-Jan-83 00:00:00 GMT', + 'output': 'Saturday, 01-Jan-83 00:00:00 GMT' + }, + { + 'data': 'key=value; expires=Friday, 19-Nov-82 16:59:30 GMT', + 'output': 'Friday, 19-Nov-82 16:59:30 GMT' + }, + # from RFC 9110 + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.6.7-6 + { + 'data': 'key=value; expires=Sunday, 06-Nov-94 08:49:37 GMT', + 'output': 'Sunday, 06-Nov-94 08:49:37 GMT' + }, + # other test cases + { + 'data': 'key=value; expires=Wednesday, 09-Nov-94 08:49:37 GMT', + 'output': 'Wednesday, 09-Nov-94 08:49:37 GMT' + }, + { + 'data': 'key=value; expires=Friday, 11-Nov-94 08:49:37 GMT', + 'output': 'Friday, 11-Nov-94 08:49:37 GMT' + }, + { + 'data': 'key=value; expires=Monday, 14-Nov-94 08:49:37 GMT', + 'output': 'Monday, 14-Nov-94 08:49:37 GMT' + }, + ] + + for case in test_cases: + with self.subTest(data=case['data']): + C = cookies.SimpleCookie() + C.load(case['data']) + + # Extract the cookie name from the data string + cookie_name = case['data'].split('=')[0] + + # Check if the cookie is loaded correctly + self.assertIn(cookie_name, C) + self.assertEqual(C[cookie_name].get('expires'), case['output']) + def test_unquote(self): cases = [ (r'a="b=\""', 'b="'), diff --git a/Misc/NEWS.d/next/Library/2024-08-27-18-58-01.gh-issue-123401.t4-FpI.rst b/Misc/NEWS.d/next/Library/2024-08-27-18-58-01.gh-issue-123401.t4-FpI.rst new file mode 100644 index 00000000000000..638f3f76239ca6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-27-18-58-01.gh-issue-123401.t4-FpI.rst @@ -0,0 +1,3 @@ +The :mod:`http.cookies` module now supports parsing obsolete :rfc:`850` +date formats, in accordance with :rfc:`9110` requirements. +Patch by Nano Zheng. From 5a23994a3dbee43a0b08f5920032f60f38b63071 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 11 Dec 2024 14:02:59 +0000 Subject: [PATCH 53/55] GH-127058: Make `PySequence_Tuple` safer and probably faster. (#127758) * Use a small buffer, then list when constructing a tuple from an arbitrary sequence. --- Include/internal/pycore_list.h | 2 +- Lib/test/test_capi/test_tuple.py | 25 ++++++ ...-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst | 3 + Objects/abstract.c | 87 +++++++++---------- Objects/listobject.c | 19 ++++ 5 files changed, 88 insertions(+), 48 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index f03e484f5ef8b0..836ff30abfcedb 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -62,7 +62,7 @@ typedef struct { union _PyStackRef; PyAPI_FUNC(PyObject *)_PyList_FromStackRefSteal(const union _PyStackRef *src, Py_ssize_t n); - +PyAPI_FUNC(PyObject *)_PyList_AsTupleAndClear(PyListObject *v); #ifdef __cplusplus } diff --git a/Lib/test/test_capi/test_tuple.py b/Lib/test/test_capi/test_tuple.py index e6b49caeb51f32..6349467c5d6b70 100644 --- a/Lib/test/test_capi/test_tuple.py +++ b/Lib/test/test_capi/test_tuple.py @@ -1,5 +1,6 @@ import unittest import sys +import gc from collections import namedtuple from test.support import import_helper @@ -257,5 +258,29 @@ def test__tuple_resize(self): self.assertRaises(SystemError, resize, [1, 2, 3], 0, False) self.assertRaises(SystemError, resize, NULL, 0, False) + def test_bug_59313(self): + # Before 3.14, the C-API function PySequence_Tuple + # would create incomplete tuples which were visible to + # the cycle GC, and this test would crash the interpeter. + TAG = object() + tuples = [] + + def referrer_tuples(): + return [x for x in gc.get_referrers(TAG) + if isinstance(x, tuple)] + + def my_iter(): + nonlocal tuples + yield TAG # 'tag' gets stored in the result tuple + tuples += referrer_tuples() + for x in range(10): + tuples += referrer_tuples() + # Prior to 3.13 would raise a SystemError when the tuple needs to be resized + yield x + + self.assertEqual(tuple(my_iter()), (TAG, *range(10))) + self.assertEqual(tuples, []) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst new file mode 100644 index 00000000000000..248e1b4855afb8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst @@ -0,0 +1,3 @@ +``PySequence_Tuple`` now creates the resulting tuple atomically, preventing +partially created tuples being visible to the garbage collector or through +``gc.get_referrers()`` diff --git a/Objects/abstract.c b/Objects/abstract.c index f6647874d732f6..c92ef10aa79648 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1993,9 +1993,6 @@ PyObject * PySequence_Tuple(PyObject *v) { PyObject *it; /* iter(v) */ - Py_ssize_t n; /* guess for result tuple size */ - PyObject *result = NULL; - Py_ssize_t j; if (v == NULL) { return null_error(); @@ -2017,58 +2014,54 @@ PySequence_Tuple(PyObject *v) if (it == NULL) return NULL; - /* Guess result size and allocate space. */ - n = PyObject_LengthHint(v, 10); - if (n == -1) - goto Fail; - result = PyTuple_New(n); - if (result == NULL) - goto Fail; - - /* Fill the tuple. */ - for (j = 0; ; ++j) { + Py_ssize_t n; + PyObject *buffer[8]; + for (n = 0; n < 8; n++) { PyObject *item = PyIter_Next(it); if (item == NULL) { - if (PyErr_Occurred()) - goto Fail; - break; - } - if (j >= n) { - size_t newn = (size_t)n; - /* The over-allocation strategy can grow a bit faster - than for lists because unlike lists the - over-allocation isn't permanent -- we reclaim - the excess before the end of this routine. - So, grow by ten and then add 25%. - */ - newn += 10u; - newn += newn >> 2; - if (newn > PY_SSIZE_T_MAX) { - /* Check for overflow */ - PyErr_NoMemory(); - Py_DECREF(item); - goto Fail; + if (PyErr_Occurred()) { + goto fail; } - n = (Py_ssize_t)newn; - if (_PyTuple_Resize(&result, n) != 0) { - Py_DECREF(item); - goto Fail; + Py_DECREF(it); + return _PyTuple_FromArraySteal(buffer, n); + } + buffer[n] = item; + } + PyListObject *list = (PyListObject *)PyList_New(16); + if (list == NULL) { + goto fail; + } + assert(n == 8); + Py_SET_SIZE(list, n); + for (Py_ssize_t j = 0; j < n; j++) { + PyList_SET_ITEM(list, j, buffer[j]); + } + for (;;) { + PyObject *item = PyIter_Next(it); + if (item == NULL) { + if (PyErr_Occurred()) { + Py_DECREF(list); + Py_DECREF(it); + return NULL; } + break; + } + if (_PyList_AppendTakeRef(list, item) < 0) { + Py_DECREF(list); + Py_DECREF(it); + return NULL; } - PyTuple_SET_ITEM(result, j, item); } - - /* Cut tuple back if guess was too large. */ - if (j < n && - _PyTuple_Resize(&result, j) != 0) - goto Fail; - Py_DECREF(it); - return result; - -Fail: - Py_XDECREF(result); + PyObject *res = _PyList_AsTupleAndClear(list); + Py_DECREF(list); + return res; +fail: Py_DECREF(it); + while (n > 0) { + n--; + Py_DECREF(buffer[n]); + } return NULL; } diff --git a/Objects/listobject.c b/Objects/listobject.c index 3832295600a0ab..a877bad66be45f 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3174,6 +3174,25 @@ PyList_AsTuple(PyObject *v) return ret; } +PyObject * +_PyList_AsTupleAndClear(PyListObject *self) +{ + assert(self != NULL); + PyObject *ret; + if (self->ob_item == NULL) { + return PyTuple_New(0); + } + Py_BEGIN_CRITICAL_SECTION(self); + PyObject **items = self->ob_item; + Py_ssize_t size = Py_SIZE(self); + self->ob_item = NULL; + Py_SET_SIZE(self, 0); + ret = _PyTuple_FromArraySteal(items, size); + free_list_items(items, false); + Py_END_CRITICAL_SECTION(); + return ret; +} + PyObject * _PyList_FromStackRefSteal(const _PyStackRef *src, Py_ssize_t n) { From b0f278ff0551b06191cec01445c577e3b25570da Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 11 Dec 2024 16:06:07 +0100 Subject: [PATCH 54/55] gh-127065: Make methodcaller thread-safe and re-entrant (GH-127746) The function `operator.methodcaller` was not thread-safe since the additional of the vectorcall method in gh-89013. In the free threading build the issue is easy to trigger, for the normal build harder. This makes the `methodcaller` safe by: * Replacing the lazy initialization with initialization in the constructor. * Using a stack allocated space for the vectorcall arguments and falling back to `tp_call` for calls with more than 8 arguments. --- .../test_free_threading/test_methodcaller.py | 33 ++++ Lib/test/test_operator.py | 13 ++ ...-12-01-22-28-41.gh-issue-127065.tFpRer.rst | 1 + Modules/_operator.c | 180 ++++++++---------- 4 files changed, 131 insertions(+), 96 deletions(-) create mode 100644 Lib/test/test_free_threading/test_methodcaller.py create mode 100644 Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst diff --git a/Lib/test/test_free_threading/test_methodcaller.py b/Lib/test/test_free_threading/test_methodcaller.py new file mode 100644 index 00000000000000..8846b0010012f2 --- /dev/null +++ b/Lib/test/test_free_threading/test_methodcaller.py @@ -0,0 +1,33 @@ +import unittest +from threading import Thread +from test.support import threading_helper +from operator import methodcaller + + +class TestMethodcaller(unittest.TestCase): + def test_methodcaller_threading(self): + number_of_threads = 10 + size = 4_000 + + mc = methodcaller("append", 2) + + def work(mc, l, ii): + for _ in range(ii): + mc(l) + + worker_threads = [] + lists = [] + for ii in range(number_of_threads): + l = [] + lists.append(l) + worker_threads.append(Thread(target=work, args=[mc, l, size])) + for t in worker_threads: + t.start() + for t in worker_threads: + t.join() + for l in lists: + assert len(l) == size + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py index 812d46482e238a..82578a0ef1e6f2 100644 --- a/Lib/test/test_operator.py +++ b/Lib/test/test_operator.py @@ -482,6 +482,8 @@ def bar(self, f=42): return f def baz(*args, **kwds): return kwds['name'], kwds['self'] + def return_arguments(self, *args, **kwds): + return args, kwds a = A() f = operator.methodcaller('foo') self.assertRaises(IndexError, f, a) @@ -498,6 +500,17 @@ def baz(*args, **kwds): f = operator.methodcaller('baz', name='spam', self='eggs') self.assertEqual(f(a), ('spam', 'eggs')) + many_positional_arguments = tuple(range(10)) + many_kw_arguments = dict(zip('abcdefghij', range(10))) + f = operator.methodcaller('return_arguments', *many_positional_arguments) + self.assertEqual(f(a), (many_positional_arguments, {})) + + f = operator.methodcaller('return_arguments', **many_kw_arguments) + self.assertEqual(f(a), ((), many_kw_arguments)) + + f = operator.methodcaller('return_arguments', *many_positional_arguments, **many_kw_arguments) + self.assertEqual(f(a), (many_positional_arguments, many_kw_arguments)) + def test_inplace(self): operator = self.module class C(object): diff --git a/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst b/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst new file mode 100644 index 00000000000000..03d6953b9ddfa1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst @@ -0,0 +1 @@ +Make :func:`operator.methodcaller` thread-safe and re-entrant safe. diff --git a/Modules/_operator.c b/Modules/_operator.c index 6c1945174ab7cd..ce3ef015710223 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1595,78 +1595,75 @@ static PyType_Spec attrgetter_type_spec = { typedef struct { PyObject_HEAD PyObject *name; - PyObject *xargs; // reference to arguments passed in constructor + PyObject *args; PyObject *kwds; - PyObject **vectorcall_args; /* Borrowed references */ + PyObject *vectorcall_args; PyObject *vectorcall_kwnames; vectorcallfunc vectorcall; } methodcallerobject; -#ifndef Py_GIL_DISABLED -static int _methodcaller_initialize_vectorcall(methodcallerobject* mc) -{ - PyObject* args = mc->xargs; - PyObject* kwds = mc->kwds; - - Py_ssize_t nargs = PyTuple_GET_SIZE(args); - assert(nargs > 0); - mc->vectorcall_args = PyMem_Calloc( - nargs + (kwds ? PyDict_Size(kwds) : 0), - sizeof(PyObject*)); - if (!mc->vectorcall_args) { - PyErr_NoMemory(); - return -1; - } - /* The first item of vectorcall_args will be filled with obj later */ - if (nargs > 1) { - memcpy(mc->vectorcall_args, PySequence_Fast_ITEMS(args), - nargs * sizeof(PyObject*)); - } - if (kwds) { - const Py_ssize_t nkwds = PyDict_Size(kwds); - - mc->vectorcall_kwnames = PyTuple_New(nkwds); - if (!mc->vectorcall_kwnames) { - return -1; - } - Py_ssize_t i = 0, ppos = 0; - PyObject* key, * value; - while (PyDict_Next(kwds, &ppos, &key, &value)) { - PyTuple_SET_ITEM(mc->vectorcall_kwnames, i, Py_NewRef(key)); - mc->vectorcall_args[nargs + i] = value; // borrowed reference - ++i; - } - } - else { - mc->vectorcall_kwnames = NULL; - } - return 1; -} +#define _METHODCALLER_MAX_ARGS 8 static PyObject * -methodcaller_vectorcall( - methodcallerobject *mc, PyObject *const *args, size_t nargsf, PyObject* kwnames) +methodcaller_vectorcall(methodcallerobject *mc, PyObject *const *args, + size_t nargsf, PyObject* kwnames) { if (!_PyArg_CheckPositional("methodcaller", PyVectorcall_NARGS(nargsf), 1, 1) || !_PyArg_NoKwnames("methodcaller", kwnames)) { return NULL; } - if (mc->vectorcall_args == NULL) { - if (_methodcaller_initialize_vectorcall(mc) < 0) { - return NULL; - } - } + assert(mc->vectorcall_args != NULL); + + PyObject *tmp_args[_METHODCALLER_MAX_ARGS]; + tmp_args[0] = args[0]; + assert(1 + PyTuple_GET_SIZE(mc->vectorcall_args) <= _METHODCALLER_MAX_ARGS); + memcpy(tmp_args + 1, _PyTuple_ITEMS(mc->vectorcall_args), sizeof(PyObject *) * PyTuple_GET_SIZE(mc->vectorcall_args)); - assert(mc->vectorcall_args != 0); - mc->vectorcall_args[0] = args[0]; - return PyObject_VectorcallMethod( - mc->name, mc->vectorcall_args, - (PyTuple_GET_SIZE(mc->xargs)) | PY_VECTORCALL_ARGUMENTS_OFFSET, + return PyObject_VectorcallMethod(mc->name, tmp_args, + (1 + PyTuple_GET_SIZE(mc->args)) | PY_VECTORCALL_ARGUMENTS_OFFSET, mc->vectorcall_kwnames); } -#endif +static int +_methodcaller_initialize_vectorcall(methodcallerobject* mc) +{ + PyObject* args = mc->args; + PyObject* kwds = mc->kwds; + + if (kwds && PyDict_Size(kwds)) { + PyObject *values = PyDict_Values(kwds); + if (!values) { + return -1; + } + PyObject *values_tuple = PySequence_Tuple(values); + Py_DECREF(values); + if (!values_tuple) { + return -1; + } + if (PyTuple_GET_SIZE(args)) { + mc->vectorcall_args = PySequence_Concat(args, values_tuple); + Py_DECREF(values_tuple); + if (mc->vectorcall_args == NULL) { + return -1; + } + } + else { + mc->vectorcall_args = values_tuple; + } + mc->vectorcall_kwnames = PySequence_Tuple(kwds); + if (!mc->vectorcall_kwnames) { + return -1; + } + } + else { + mc->vectorcall_args = Py_NewRef(args); + mc->vectorcall_kwnames = NULL; + } + + mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; + return 0; +} /* AC 3.5: variable number of arguments, not currently support by AC */ static PyObject * @@ -1694,25 +1691,30 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (mc == NULL) { return NULL; } + mc->vectorcall = NULL; + mc->vectorcall_args = NULL; + mc->vectorcall_kwnames = NULL; + mc->kwds = Py_XNewRef(kwds); Py_INCREF(name); PyInterpreterState *interp = _PyInterpreterState_GET(); _PyUnicode_InternMortal(interp, &name); mc->name = name; - mc->xargs = Py_XNewRef(args); // allows us to use borrowed references - mc->kwds = Py_XNewRef(kwds); - mc->vectorcall_args = 0; - + mc->args = PyTuple_GetSlice(args, 1, PyTuple_GET_SIZE(args)); + if (mc->args == NULL) { + Py_DECREF(mc); + return NULL; + } -#ifdef Py_GIL_DISABLED - // gh-127065: The current implementation of methodcaller_vectorcall - // is not thread-safe because it modifies the `vectorcall_args` array, - // which is shared across calls. - mc->vectorcall = NULL; -#else - mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; -#endif + Py_ssize_t vectorcall_size = PyTuple_GET_SIZE(args) + + (kwds ? PyDict_Size(kwds) : 0); + if (vectorcall_size < (_METHODCALLER_MAX_ARGS)) { + if (_methodcaller_initialize_vectorcall(mc) < 0) { + Py_DECREF(mc); + return NULL; + } + } PyObject_GC_Track(mc); return (PyObject *)mc; @@ -1722,13 +1724,10 @@ static void methodcaller_clear(methodcallerobject *mc) { Py_CLEAR(mc->name); - Py_CLEAR(mc->xargs); + Py_CLEAR(mc->args); Py_CLEAR(mc->kwds); - if (mc->vectorcall_args != NULL) { - PyMem_Free(mc->vectorcall_args); - mc->vectorcall_args = 0; - Py_CLEAR(mc->vectorcall_kwnames); - } + Py_CLEAR(mc->vectorcall_args); + Py_CLEAR(mc->vectorcall_kwnames); } static void @@ -1745,8 +1744,10 @@ static int methodcaller_traverse(methodcallerobject *mc, visitproc visit, void *arg) { Py_VISIT(mc->name); - Py_VISIT(mc->xargs); + Py_VISIT(mc->args); Py_VISIT(mc->kwds); + Py_VISIT(mc->vectorcall_args); + Py_VISIT(mc->vectorcall_kwnames); Py_VISIT(Py_TYPE(mc)); return 0; } @@ -1765,15 +1766,7 @@ methodcaller_call(methodcallerobject *mc, PyObject *args, PyObject *kw) if (method == NULL) return NULL; - - PyObject *cargs = PyTuple_GetSlice(mc->xargs, 1, PyTuple_GET_SIZE(mc->xargs)); - if (cargs == NULL) { - Py_DECREF(method); - return NULL; - } - - result = PyObject_Call(method, cargs, mc->kwds); - Py_DECREF(cargs); + result = PyObject_Call(method, mc->args, mc->kwds); Py_DECREF(method); return result; } @@ -1791,7 +1784,7 @@ methodcaller_repr(methodcallerobject *mc) } numkwdargs = mc->kwds != NULL ? PyDict_GET_SIZE(mc->kwds) : 0; - numposargs = PyTuple_GET_SIZE(mc->xargs) - 1; + numposargs = PyTuple_GET_SIZE(mc->args); numtotalargs = numposargs + numkwdargs; if (numtotalargs == 0) { @@ -1807,7 +1800,7 @@ methodcaller_repr(methodcallerobject *mc) } for (i = 0; i < numposargs; ++i) { - PyObject *onerepr = PyObject_Repr(PyTuple_GET_ITEM(mc->xargs, i+1)); + PyObject *onerepr = PyObject_Repr(PyTuple_GET_ITEM(mc->args, i)); if (onerepr == NULL) goto done; PyTuple_SET_ITEM(argreprs, i, onerepr); @@ -1859,14 +1852,14 @@ methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) { if (!mc->kwds || PyDict_GET_SIZE(mc->kwds) == 0) { Py_ssize_t i; - Py_ssize_t newarg_size = PyTuple_GET_SIZE(mc->xargs); - PyObject *newargs = PyTuple_New(newarg_size); + Py_ssize_t callargcount = PyTuple_GET_SIZE(mc->args); + PyObject *newargs = PyTuple_New(1 + callargcount); if (newargs == NULL) return NULL; PyTuple_SET_ITEM(newargs, 0, Py_NewRef(mc->name)); - for (i = 1; i < newarg_size; ++i) { - PyObject *arg = PyTuple_GET_ITEM(mc->xargs, i); - PyTuple_SET_ITEM(newargs, i, Py_NewRef(arg)); + for (i = 0; i < callargcount; ++i) { + PyObject *arg = PyTuple_GET_ITEM(mc->args, i); + PyTuple_SET_ITEM(newargs, i + 1, Py_NewRef(arg)); } return Py_BuildValue("ON", Py_TYPE(mc), newargs); } @@ -1884,12 +1877,7 @@ methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) constructor = PyObject_VectorcallDict(partial, newargs, 2, mc->kwds); Py_DECREF(partial); - PyObject *args = PyTuple_GetSlice(mc->xargs, 1, PyTuple_GET_SIZE(mc->xargs)); - if (!args) { - Py_DECREF(constructor); - return NULL; - } - return Py_BuildValue("NO", constructor, args); + return Py_BuildValue("NO", constructor, mc->args); } } From dd9da738ad1d420fabafaded3fe63912b2b17cfb Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Wed, 11 Dec 2024 11:28:44 -0500 Subject: [PATCH 55/55] gh-118915: C API: Document frame locals proxies. (#127720) Co-authored-by: Alex Waygood --- Doc/c-api/frame.rst | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/frame.rst b/Doc/c-api/frame.rst index 638a740e0c24da..1a52e146a69751 100644 --- a/Doc/c-api/frame.rst +++ b/Doc/c-api/frame.rst @@ -132,7 +132,7 @@ See also :ref:`Reflection `. .. versionadded:: 3.11 .. versionchanged:: 3.13 - As part of :pep:`667`, return a proxy object for optimized scopes. + As part of :pep:`667`, return an instance of :c:var:`PyFrameLocalsProxy_Type`. .. c:function:: int PyFrame_GetLineNumber(PyFrameObject *frame) @@ -140,6 +140,26 @@ See also :ref:`Reflection `. Return the line number that *frame* is currently executing. +Frame Locals Proxies +^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 3.13 + +The :attr:`~frame.f_locals` attribute on a :ref:`frame object ` +is an instance of a "frame-locals proxy". The proxy object exposes a +write-through view of the underlying locals dictionary for the frame. This +ensures that the variables exposed by ``f_locals`` are always up to date with +the live local variables in the frame itself. + +See :pep:`667` for more information. + +.. c:var:: PyTypeObject PyFrameLocalsProxy_Type + + The type of frame :func:`locals` proxy objects. + +.. c:function:: int PyFrameLocalsProxy_Check(PyObject *obj) + + Return non-zero if *obj* is a frame :func:`locals` proxy. Internal Frames ^^^^^^^^^^^^^^^