From 00d913c6718aa365027c6dcf850e8f40731e54fc Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 6 May 2024 13:06:09 -0700 Subject: [PATCH 01/15] gh-118415: Fix issues with local tracing being enabled/disabled on a function (#118496) --- .../test_free_threading/test_monitoring.py | 39 +++++--- Python/instrumentation.c | 91 +++++++++---------- 2 files changed, 71 insertions(+), 59 deletions(-) diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py index e170840ca3cb27..fa588046bf9923 100644 --- a/Lib/test/test_free_threading/test_monitoring.py +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -8,20 +8,14 @@ from sys import monitoring from test.support import is_wasi -from threading import Thread +from threading import Thread, _PyRLock from unittest import TestCase class InstrumentationMultiThreadedMixin: - if not hasattr(sys, "gettotalrefcount"): - thread_count = 50 - func_count = 1000 - fib = 15 - else: - # Run a little faster in debug builds... - thread_count = 25 - func_count = 500 - fib = 15 + thread_count = 10 + func_count = 200 + fib = 12 def after_threads(self): """Runs once after all the threads have started""" @@ -175,9 +169,6 @@ def during_threads(self): @unittest.skipIf(is_wasi, "WASI has no threads.") class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): """Uses sys.setprofile and repeatedly toggles instrumentation on and off""" - thread_count = 25 - func_count = 200 - fib = 15 def setUp(self): self.set = False @@ -227,6 +218,28 @@ def test_register_callback(self): for ref in self.refs: self.assertEqual(ref(), None) + def test_set_local_trace_opcodes(self): + def trace(frame, event, arg): + frame.f_trace_opcodes = True + return trace + + sys.settrace(trace) + try: + l = _PyRLock() + + def f(): + for i in range(3000): + with l: + pass + + t = Thread(target=f) + t.start() + for i in range(3000): + with l: + pass + t.join() + finally: + sys.settrace(None) if __name__ == "__main__": unittest.main() diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 72c9d2af5b3202..77d3489afcfc72 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -459,7 +459,6 @@ dump_instrumentation_data(PyCodeObject *code, int star, FILE*out) } #define CHECK(test) do { \ - ASSERT_WORLD_STOPPED_OR_LOCKED(code); \ if (!(test)) { \ dump_instrumentation_data(code, i, stderr); \ } \ @@ -516,10 +515,6 @@ sanity_check_instrumentation(PyCodeObject *code) if (!is_instrumented(opcode)) { CHECK(_PyOpcode_Deopt[opcode] == opcode); } - if (data->per_instruction_tools) { - uint8_t tools = active_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION]; - CHECK((tools & data->per_instruction_tools[i]) == data->per_instruction_tools[i]); - } } if (opcode == INSTRUMENTED_LINE) { CHECK(data->lines); @@ -677,8 +672,6 @@ de_instrument_per_instruction(PyCodeObject *code, int i) } assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION); assert(instr->op.code != INSTRUMENTED_INSTRUCTION); - /* Keep things clean for sanity check */ - code->_co_monitoring->per_instruction_opcodes[i] = 0; } @@ -992,6 +985,7 @@ set_global_version(PyThreadState *tstate, uint32_t version) static bool is_version_up_to_date(PyCodeObject *code, PyInterpreterState *interp) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); return global_version(interp) == code->_co_instrumentation_version; } @@ -999,11 +993,24 @@ is_version_up_to_date(PyCodeObject *code, PyInterpreterState *interp) static bool instrumentation_cross_checks(PyInterpreterState *interp, PyCodeObject *code) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); _Py_LocalMonitors expected = local_union( interp->monitors, code->_co_monitoring->local_monitors); return monitors_equals(code->_co_monitoring->active_monitors, expected); } + +static int +debug_check_sanity(PyInterpreterState *interp, PyCodeObject *code) +{ + int res; + LOCK_CODE(code); + res = is_version_up_to_date(code, interp) && + instrumentation_cross_checks(interp, code); + UNLOCK_CODE(); + return res; +} + #endif static inline uint8_t @@ -1018,8 +1025,7 @@ get_tools_for_instruction(PyCodeObject *code, PyInterpreterState *interp, int i, event = PY_MONITORING_EVENT_CALL; } if (PY_MONITORING_IS_INSTRUMENTED_EVENT(event)) { - CHECK(is_version_up_to_date(code, interp)); - CHECK(instrumentation_cross_checks(interp, code)); + CHECK(debug_check_sanity(interp, code)); if (code->_co_monitoring->tools) { tools = code->_co_monitoring->tools[i]; } @@ -1218,8 +1224,7 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, { PyCodeObject *code = _PyFrame_GetCode(frame); assert(tstate->tracing == 0); - assert(is_version_up_to_date(code, tstate->interp)); - assert(instrumentation_cross_checks(tstate->interp, code)); + assert(debug_check_sanity(tstate->interp, code)); int i = (int)(instr - _PyCode_CODE(code)); _PyCoMonitoringData *monitoring = code->_co_monitoring; @@ -1339,8 +1344,7 @@ int _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr) { PyCodeObject *code = _PyFrame_GetCode(frame); - assert(is_version_up_to_date(code, tstate->interp)); - assert(instrumentation_cross_checks(tstate->interp, code)); + assert(debug_check_sanity(tstate->interp, code)); int offset = (int)(instr - _PyCode_CODE(code)); _PyCoMonitoringData *instrumentation_data = code->_co_monitoring; assert(instrumentation_data->per_instruction_opcodes); @@ -1671,9 +1675,11 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) PyErr_NoMemory(); return -1; } - /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ + // Initialize all of the instructions so if local events change while another thread is executing + // we know what the original opcode was. for (int i = 0; i < code_len; i++) { - code->_co_monitoring->per_instruction_opcodes[i] = 0; + int opcode = _PyCode_CODE(code)[i].op.code; + code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode]; } } if (multitools && code->_co_monitoring->per_instruction_tools == NULL) { @@ -1682,7 +1688,6 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) PyErr_NoMemory(); return -1; } - /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { code->_co_monitoring->per_instruction_tools[i] = 0; } @@ -1692,17 +1697,10 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) } static int -instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) +force_instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); - if (is_version_up_to_date(code, interp)) { - assert( - interp->ceval.instrumentation_version == 0 || - instrumentation_cross_checks(interp, code) - ); - return 0; - } #ifdef _Py_TIER2 if (code->co_executors != NULL) { _PyCode_Clear_Executors(code); @@ -1769,7 +1767,6 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) // GH-103845: We need to remove both the line and instruction instrumentation before // adding new ones, otherwise we may remove the newly added instrumentation. - uint8_t removed_line_tools = removed_events.tools[PY_MONITORING_EVENT_LINE]; uint8_t removed_per_instruction_tools = removed_events.tools[PY_MONITORING_EVENT_INSTRUCTION]; @@ -1777,9 +1774,7 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; for (int i = code->_co_firsttraceable; i < code_len;) { if (line_data[i].original_opcode) { - if (removed_line_tools) { - remove_line_tools(code, i, removed_line_tools); - } + remove_line_tools(code, i, removed_line_tools); } i += _PyInstruction_GetLength(code, i); } @@ -1791,15 +1786,14 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) i += _PyInstruction_GetLength(code, i); continue; } - if (removed_per_instruction_tools) { - remove_per_instruction_tools(code, i, removed_per_instruction_tools); - } + remove_per_instruction_tools(code, i, removed_per_instruction_tools); i += _PyInstruction_GetLength(code, i); } } #ifdef INSTRUMENT_DEBUG sanity_check_instrumentation(code); #endif + uint8_t new_line_tools = new_events.tools[PY_MONITORING_EVENT_LINE]; uint8_t new_per_instruction_tools = new_events.tools[PY_MONITORING_EVENT_INSTRUCTION]; @@ -1807,9 +1801,7 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; for (int i = code->_co_firsttraceable; i < code_len;) { if (line_data[i].original_opcode) { - if (new_line_tools) { - add_line_tools(code, i, new_line_tools); - } + add_line_tools(code, i, new_line_tools); } i += _PyInstruction_GetLength(code, i); } @@ -1821,12 +1813,11 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) i += _PyInstruction_GetLength(code, i); continue; } - if (new_per_instruction_tools) { - add_per_instruction_tools(code, i, new_per_instruction_tools); - } + add_per_instruction_tools(code, i, new_per_instruction_tools); i += _PyInstruction_GetLength(code, i); } } + done: FT_ATOMIC_STORE_UINTPTR_RELEASE(code->_co_instrumentation_version, global_version(interp)); @@ -1837,6 +1828,22 @@ instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) return 0; } +static int +instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) +{ + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + + if (is_version_up_to_date(code, interp)) { + assert( + interp->ceval.instrumentation_version == 0 || + instrumentation_cross_checks(interp, code) + ); + return 0; + } + + return force_instrument_lock_held(code, interp); +} + int _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) { @@ -1983,16 +1990,8 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent goto done; } set_local_events(local, tool_id, events); - if (is_version_up_to_date(code, interp)) { - /* Force instrumentation update */ - code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT; - } - -#ifdef _Py_TIER2 - _Py_Executors_InvalidateDependency(interp, code, 1); -#endif - res = instrument_lock_held(code, interp); + res = force_instrument_lock_held(code, interp); done: UNLOCK_CODE(); From 616b745b89a52a1d27123107718f85e65918afdc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 6 May 2024 22:21:06 +0100 Subject: [PATCH 02/15] GH-115709: Invalidate executors when a local variable is changed via frame.f_locals (#118639) Also fix unrelated assert in debug Tier2/JIT builds. --- Include/cpython/optimizer.h | 12 +++++++++--- Lib/test/test_capi/test_opt.py | 13 +++++++++++++ Objects/frameobject.c | 3 ++- Python/bytecodes.c | 3 +++ Python/generated_cases.c.h | 3 +++ 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 744a272251e75c..5f218d75b346a0 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -141,9 +141,6 @@ void _Py_ExecutorDetach(_PyExecutorObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); -PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); -PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); - /* For testing */ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); @@ -151,6 +148,15 @@ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 +#ifdef _Py_TIER2 +PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); +PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); +#else +# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) +# define _Py_Executors_InvalidateAll(A, B) ((void)0) +#endif + + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 6e5b626e93291a..0491ff9b84d486 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1321,5 +1321,18 @@ def testfunc(n): self.assertIsNotNone(ex) self.assertIn("_FOR_ITER_GEN_FRAME", get_opnames(ex)) + def test_modified_local_is_seen_by_optimized_code(self): + l = sys._getframe().f_locals + a = 1 + s = 0 + for j in range(1 << 10): + a + a + l["xa"[j >> 9]] = 1.0 + s += a + self.assertIs(type(a), float) + self.assertIs(type(s), float) + self.assertEqual(s, 1024.0) + + if __name__ == "__main__": unittest.main() diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 869cdec8aa7062..26a04cbeea90bf 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -148,8 +148,9 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) if (PyUnicode_CheckExact(key)) { int i = framelocalsproxy_getkeyindex(frame, key, false); if (i >= 0) { - _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); + _Py_Executors_InvalidateDependency(PyInterpreterState_Get(), co, 1); + _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i); PyObject *oldvalue = fast[i]; PyObject *cell = NULL; if (kind == CO_FAST_FREE) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b2a0dc030e20cc..b2ddec98e682f2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2424,6 +2424,9 @@ dummy_func( opcode = executor->vm_data.opcode; oparg = (oparg & ~255) | executor->vm_data.oparg; next_instr = this_instr; + if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) { + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); + } DISPATCH_GOTO(); } tstate->previous_executor = Py_None; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 87098b0506522f..d3126b0c980723 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2673,6 +2673,9 @@ opcode = executor->vm_data.opcode; oparg = (oparg & ~255) | executor->vm_data.oparg; next_instr = this_instr; + if (_PyOpcode_Caches[_PyOpcode_Deopt[opcode]]) { + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); + } DISPATCH_GOTO(); } tstate->previous_executor = Py_None; From 2ba2c142a615abbd8138d253edfe02426c386961 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 6 May 2024 17:24:14 -0400 Subject: [PATCH 03/15] gh-118527: Intern code name and filename on default build (#118576) Interned and non-interned strings are treated differently by `marshal`, so be consistent between the default and free-threaded build. --- Objects/codeobject.c | 2 -- Programs/test_frozenmain.h | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 15d3960fc2df51..2ce5f7dca2eb5c 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -390,11 +390,9 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->co_filename = Py_NewRef(con->filename); co->co_name = Py_NewRef(con->name); co->co_qualname = Py_NewRef(con->qualname); -#ifdef Py_GIL_DISABLED PyUnicode_InternInPlace(&co->co_filename); PyUnicode_InternInPlace(&co->co_name); PyUnicode_InternInPlace(&co->co_qualname); -#endif co->co_flags = con->flags; co->co_firstlineno = con->firstlineno; diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 657e9345cf5ab7..3e7ab7bffb158e 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -25,8 +25,8 @@ unsigned char M_test_frozenmain[] = { 108,99,97,112,105,218,5,112,114,105,110,116,218,4,97,114, 103,118,218,11,103,101,116,95,99,111,110,102,105,103,115,114, 3,0,0,0,218,3,107,101,121,169,0,243,0,0,0,0, - 250,18,116,101,115,116,95,102,114,111,122,101,110,109,97,105, - 110,46,112,121,250,8,60,109,111,100,117,108,101,62,114,18, + 218,18,116,101,115,116,95,102,114,111,122,101,110,109,97,105, + 110,46,112,121,218,8,60,109,111,100,117,108,101,62,114,18, 0,0,0,1,0,0,0,115,99,0,0,0,240,3,1,1, 1,243,8,0,1,11,219,0,24,225,0,5,208,6,26,212, 0,27,217,0,5,128,106,144,35,151,40,145,40,212,0,27, From 9fd33af5ac57f649971ffd8091bd898a96f85b55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 7 May 2024 00:08:17 +0200 Subject: [PATCH 04/15] Test premium Mac builders (#118672) --- .github/workflows/build.yml | 2 +- .github/workflows/reusable-macos.yml | 2 +- Lib/test/test_pyrepl.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index aac395d5638750..7d9e70a68005e3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -208,7 +208,7 @@ jobs: with: config_hash: ${{ needs.check_source.outputs.config_hash }} # macos-14 is M1, macos-13 is Intel - os-matrix: '["macos-14", "macos-13"]' + os-matrix: '["macos-14-xlarge", "macos-13-large"]' build_macos_free_threading: name: 'macOS (free-threading)' diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index e3319f141bd7f5..d06a718d199c96 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -50,7 +50,7 @@ jobs: --prefix=/opt/python-dev \ --with-openssl="$(brew --prefix openssl@3.0)" - name: Build CPython - run: make -j4 + run: make -j8 - name: Display build info run: make pythoninfo - name: Tests diff --git a/Lib/test/test_pyrepl.py b/Lib/test/test_pyrepl.py index 2b217abdf1543b..3df76e02b231dc 100644 --- a/Lib/test/test_pyrepl.py +++ b/Lib/test/test_pyrepl.py @@ -15,9 +15,9 @@ # Optionally test pyrepl. This currently requires that the # 'curses' resource be given on the regrtest command line using the -u # option. Additionally, we need to attempt to import curses and readline. -requires('curses') -curses = import_module('curses') -readline = import_module('readline') +requires("curses") +curses = import_module("curses") +readline = import_module("readline") from _pyrepl.console import Console, Event from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig From 8419f01673858d5002747c9393d4ed0ec22fdb47 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Mon, 6 May 2024 15:35:06 -0700 Subject: [PATCH 05/15] gh-118647: Add defaults to typing.Generator and typing.AsyncGenerator (#118648) Co-authored-by: Alex Waygood Co-authored-by: Nikita Sobolev --- Doc/library/typing.rst | 26 ++++++++++++++++--- Lib/test/test_typing.py | 11 ++++++++ Lib/typing.py | 21 +++++++++++---- ...-05-06-08-23-01.gh-issue-118648.OVA3jJ.rst | 2 ++ 4 files changed, 51 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-06-08-23-01.gh-issue-118648.OVA3jJ.rst diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index e06287250641e6..652a3f1f70519c 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -3648,8 +3648,14 @@ Aliases to asynchronous ABCs in :mod:`collections.abc` is no ``ReturnType`` type parameter. As with :class:`Generator`, the ``SendType`` behaves contravariantly. - If your generator will only yield values, set the ``SendType`` to - ``None``:: + The ``SendType`` defaults to :const:`!None`:: + + async def infinite_stream(start: int) -> AsyncGenerator[int]: + while True: + yield start + start = await increment(start) + + It is also possible to set this type explicitly:: async def infinite_stream(start: int) -> AsyncGenerator[int, None]: while True: @@ -3671,6 +3677,9 @@ Aliases to asynchronous ABCs in :mod:`collections.abc` now supports subscripting (``[]``). See :pep:`585` and :ref:`types-genericalias`. + .. versionchanged:: 3.13 + The ``SendType`` parameter now has a default. + .. class:: AsyncIterable(Generic[T_co]) Deprecated alias to :class:`collections.abc.AsyncIterable`. @@ -3754,8 +3763,14 @@ Aliases to other ABCs in :mod:`collections.abc` of :class:`Generator` behaves contravariantly, not covariantly or invariantly. - If your generator will only yield values, set the ``SendType`` and - ``ReturnType`` to ``None``:: + The ``SendType`` and ``ReturnType`` parameters default to :const:`!None`:: + + def infinite_stream(start: int) -> Generator[int]: + while True: + yield start + start += 1 + + It is also possible to set these types explicitly:: def infinite_stream(start: int) -> Generator[int, None, None]: while True: @@ -3774,6 +3789,9 @@ Aliases to other ABCs in :mod:`collections.abc` :class:`collections.abc.Generator` now supports subscripting (``[]``). See :pep:`585` and :ref:`types-genericalias`. + .. versionchanged:: 3.13 + Default values for the send and return types were added. + .. class:: Hashable Deprecated alias to :class:`collections.abc.Hashable`. diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 112db03ae87887..8f0be1fd3f55e5 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -7289,6 +7289,17 @@ def foo(): g = foo() self.assertIsSubclass(type(g), typing.Generator) + def test_generator_default(self): + g1 = typing.Generator[int] + g2 = typing.Generator[int, None, None] + self.assertEqual(get_args(g1), (int, type(None), type(None))) + self.assertEqual(get_args(g1), get_args(g2)) + + g3 = typing.Generator[int, float] + g4 = typing.Generator[int, float, None] + self.assertEqual(get_args(g3), (int, float, type(None))) + self.assertEqual(get_args(g3), get_args(g4)) + def test_no_generator_instantiation(self): with self.assertRaises(TypeError): typing.Generator() diff --git a/Lib/typing.py b/Lib/typing.py index ff0e9b811a5867..c159fcfda68ee8 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -1328,7 +1328,7 @@ def __getattr__(self, attr): raise AttributeError(attr) def __setattr__(self, attr, val): - if _is_dunder(attr) or attr in {'_name', '_inst', '_nparams'}: + if _is_dunder(attr) or attr in {'_name', '_inst', '_nparams', '_defaults'}: super().__setattr__(attr, val) else: setattr(self.__origin__, attr, val) @@ -1578,11 +1578,12 @@ def __iter__(self): # parameters are accepted (needs custom __getitem__). class _SpecialGenericAlias(_NotIterable, _BaseGenericAlias, _root=True): - def __init__(self, origin, nparams, *, inst=True, name=None): + def __init__(self, origin, nparams, *, inst=True, name=None, defaults=()): if name is None: name = origin.__name__ super().__init__(origin, inst=inst, name=name) self._nparams = nparams + self._defaults = defaults if origin.__module__ == 'builtins': self.__doc__ = f'A generic version of {origin.__qualname__}.' else: @@ -1594,12 +1595,22 @@ def __getitem__(self, params): params = (params,) msg = "Parameters to generic types must be types." params = tuple(_type_check(p, msg) for p in params) + if (self._defaults + and len(params) < self._nparams + and len(params) + len(self._defaults) >= self._nparams + ): + params = (*params, *self._defaults[len(params) - self._nparams:]) actual_len = len(params) + if actual_len != self._nparams: + if self._defaults: + expected = f"at least {self._nparams - len(self._defaults)}" + else: + expected = str(self._nparams) if not self._nparams: raise TypeError(f"{self} is not a generic class") raise TypeError(f"Too {'many' if actual_len > self._nparams else 'few'} arguments for {self};" - f" actual {actual_len}, expected {self._nparams}") + f" actual {actual_len}, expected {expected}") return self.copy_with(params) def copy_with(self, params): @@ -2813,8 +2824,8 @@ class Other(Leaf): # Error reported by type checker OrderedDict = _alias(collections.OrderedDict, 2) Counter = _alias(collections.Counter, 1) ChainMap = _alias(collections.ChainMap, 2) -Generator = _alias(collections.abc.Generator, 3) -AsyncGenerator = _alias(collections.abc.AsyncGenerator, 2) +Generator = _alias(collections.abc.Generator, 3, defaults=(types.NoneType, types.NoneType)) +AsyncGenerator = _alias(collections.abc.AsyncGenerator, 2, defaults=(types.NoneType,)) Type = _alias(type, 1, inst=False, name='Type') Type.__doc__ = \ """Deprecated alias to builtins.type. diff --git a/Misc/NEWS.d/next/Library/2024-05-06-08-23-01.gh-issue-118648.OVA3jJ.rst b/Misc/NEWS.d/next/Library/2024-05-06-08-23-01.gh-issue-118648.OVA3jJ.rst new file mode 100644 index 00000000000000..7695fb0ba20df8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-06-08-23-01.gh-issue-118648.OVA3jJ.rst @@ -0,0 +1,2 @@ +Add type parameter defaults to :class:`typing.Generator` and +:class:`typing.AsyncGenerator`. From 5a9eeafa055a8724b1741316cfbd9612825220aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Tue, 7 May 2024 00:49:45 +0200 Subject: [PATCH 06/15] Use Intel with 12 cores for free-threading tests for maximum speedup (#118677) --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7d9e70a68005e3..8f966591c30235 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -218,8 +218,8 @@ jobs: with: config_hash: ${{ needs.check_source.outputs.config_hash }} free-threading: true - # macos-14 is M1 - os-matrix: '["macos-14"]' + # macos-14-large is Intel with 12 cores (most parallelism) + os-matrix: '["macos-14-large"]' build_ubuntu: name: 'Ubuntu' From 040571f258d13a807f5c8e4ce0a182d5f9a2e81b Mon Sep 17 00:00:00 2001 From: denballakh <47365157+denballakh@users.noreply.github.com> Date: Tue, 7 May 2024 01:56:28 +0300 Subject: [PATCH 07/15] fix typo in `_pyrepl.pager`: `plainpager` -> `plain_pager` (#118675) --- Lib/_pyrepl/pager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pyrepl/pager.py b/Lib/_pyrepl/pager.py index ecf5ddc79a5efa..af0409c4523bc2 100644 --- a/Lib/_pyrepl/pager.py +++ b/Lib/_pyrepl/pager.py @@ -23,7 +23,7 @@ def get_pager() -> Pager: if not sys.stdin.isatty() or not sys.stdout.isatty(): return plain_pager if sys.platform == "emscripten": - return plainpager + return plain_pager use_pager = os.environ.get('MANPAGER') or os.environ.get('PAGER') if use_pager: if sys.platform == 'win32': # pipes completely broken in Windows From e0422198fb4de0a5d81edd3de0d0ed32c119e9bb Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Mon, 6 May 2024 15:57:27 -0700 Subject: [PATCH 08/15] gh-117486: Improve behavior for user-defined AST subclasses (#118212) Now, such classes will no longer require changes in Python 3.13 in the normal case. The test suite for robotframework passes with no DeprecationWarnings under this PR. I also added a new DeprecationWarning for the case where `_field_types` exists but is incomplete, since that seems likely to indicate a user mistake. --- Doc/library/ast.rst | 14 ++++++- Doc/whatsnew/3.13.rst | 6 +++ Lib/test/test_ast.py | 41 +++++++++++++++++-- ...-04-23-21-17-00.gh-issue-117486.ea3KYD.rst | 4 ++ Parser/asdl_c.py | 31 +++++++------- Python/Python-ast.c | 31 +++++++------- 6 files changed, 94 insertions(+), 33 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-23-21-17-00.gh-issue-117486.ea3KYD.rst diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index e954c38c7c55b5..02dc7c86082502 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -61,7 +61,7 @@ Node classes .. attribute:: _fields - Each concrete class has an attribute :attr:`_fields` which gives the names + Each concrete class has an attribute :attr:`!_fields` which gives the names of all child nodes. Each instance of a concrete class has one attribute for each child node, @@ -74,6 +74,18 @@ Node classes as Python lists. All possible attributes must be present and have valid values when compiling an AST with :func:`compile`. + .. attribute:: _field_types + + The :attr:`!_field_types` attribute on each concrete class is a dictionary + mapping field names (as also listed in :attr:`_fields`) to their types. + + .. doctest:: + + >>> ast.TypeVar._field_types + {'name': , 'bound': ast.expr | None, 'default_value': ast.expr | None} + + .. versionadded:: 3.13 + .. attribute:: lineno col_offset end_lineno diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 43934baeb33b8a..c82d8bdb7342f4 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -384,6 +384,12 @@ ast argument that does not map to a field on the AST node is now deprecated, and will raise an exception in Python 3.15. + These changes do not apply to user-defined subclasses of :class:`ast.AST`, + unless the class opts in to the new behavior by setting the attribute + :attr:`ast.AST._field_types`. + + (Contributed by Jelle Zijlstra in :gh:`105858` and :gh:`117486`.) + * :func:`ast.parse` now accepts an optional argument *optimize* which is passed on to the :func:`compile` built-in. This makes it possible to obtain an optimized AST. diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 6d05c8f8f47c50..f6e22d44406d9e 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -3036,7 +3036,7 @@ def test_FunctionDef(self): self.assertEqual(node.name, 'foo') self.assertEqual(node.decorator_list, []) - def test_custom_subclass(self): + def test_custom_subclass_with_no_fields(self): class NoInit(ast.AST): pass @@ -3044,17 +3044,17 @@ class NoInit(ast.AST): self.assertIsInstance(obj, NoInit) self.assertEqual(obj.__dict__, {}) + def test_fields_but_no_field_types(self): class Fields(ast.AST): _fields = ('a',) - with self.assertWarnsRegex(DeprecationWarning, - r"Fields provides _fields but not _field_types."): - obj = Fields() + obj = Fields() with self.assertRaises(AttributeError): obj.a obj = Fields(a=1) self.assertEqual(obj.a, 1) + def test_fields_and_types(self): class FieldsAndTypes(ast.AST): _fields = ('a',) _field_types = {'a': int | None} @@ -3065,6 +3065,7 @@ class FieldsAndTypes(ast.AST): obj = FieldsAndTypes(a=1) self.assertEqual(obj.a, 1) + def test_fields_and_types_no_default(self): class FieldsAndTypesNoDefault(ast.AST): _fields = ('a',) _field_types = {'a': int} @@ -3077,6 +3078,38 @@ class FieldsAndTypesNoDefault(ast.AST): obj = FieldsAndTypesNoDefault(a=1) self.assertEqual(obj.a, 1) + def test_incomplete_field_types(self): + class MoreFieldsThanTypes(ast.AST): + _fields = ('a', 'b') + _field_types = {'a': int | None} + a: int | None = None + b: int | None = None + + with self.assertWarnsRegex( + DeprecationWarning, + r"Field 'b' is missing from MoreFieldsThanTypes\._field_types" + ): + obj = MoreFieldsThanTypes() + self.assertIs(obj.a, None) + self.assertIs(obj.b, None) + + obj = MoreFieldsThanTypes(a=1, b=2) + self.assertEqual(obj.a, 1) + self.assertEqual(obj.b, 2) + + def test_complete_field_types(self): + class _AllFieldTypes(ast.AST): + _fields = ('a', 'b') + _field_types = {'a': int | None, 'b': list[str]} + # This must be set explicitly + a: int | None = None + # This will add an implicit empty list default + b: list[str] + + obj = _AllFieldTypes() + self.assertIs(obj.a, None) + self.assertEqual(obj.b, []) + @support.cpython_only class ModuleStateTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-04-23-21-17-00.gh-issue-117486.ea3KYD.rst b/Misc/NEWS.d/next/Library/2024-04-23-21-17-00.gh-issue-117486.ea3KYD.rst new file mode 100644 index 00000000000000..f02d895161cfdb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-23-21-17-00.gh-issue-117486.ea3KYD.rst @@ -0,0 +1,4 @@ +Improve the behavior of user-defined subclasses of :class:`ast.AST`. Such +classes will now require no changes in the usual case to conform with the +behavior changes of the :mod:`ast` module in Python 3.13. Patch by Jelle +Zijlstra. diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 1f0be456655b25..11d59faeb0d42c 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -979,14 +979,9 @@ def visitModule(self, mod): goto cleanup; } if (field_types == NULL) { - if (PyErr_WarnFormat( - PyExc_DeprecationWarning, 1, - "%.400s provides _fields but not _field_types. " - "This will become an error in Python 3.15.", - Py_TYPE(self)->tp_name - ) < 0) { - res = -1; - } + // Probably a user-defined subclass of AST that lacks _field_types. + // This will continue to work as it did before 3.13; i.e., attributes + // that are not passed in simply do not exist on the instance. goto cleanup; } remaining_list = PySequence_List(remaining_fields); @@ -997,12 +992,21 @@ def visitModule(self, mod): PyObject *name = PyList_GET_ITEM(remaining_list, i); PyObject *type = PyDict_GetItemWithError(field_types, name); if (!type) { - if (!PyErr_Occurred()) { - PyErr_SetObject(PyExc_KeyError, name); + if (PyErr_Occurred()) { + goto set_remaining_cleanup; + } + else { + if (PyErr_WarnFormat( + PyExc_DeprecationWarning, 1, + "Field '%U' is missing from %.400s._field_types. " + "This will become an error in Python 3.15.", + name, Py_TYPE(self)->tp_name + ) < 0) { + goto set_remaining_cleanup; + } } - goto set_remaining_cleanup; } - if (_PyUnion_Check(type)) { + else if (_PyUnion_Check(type)) { // optional field // do nothing, we'll have set a None default on the class } @@ -1026,8 +1030,7 @@ def visitModule(self, mod): "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) { - res = -1; - goto cleanup; + goto set_remaining_cleanup; } } } diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 1953142f6def44..4956d04f719de9 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -5178,14 +5178,9 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) goto cleanup; } if (field_types == NULL) { - if (PyErr_WarnFormat( - PyExc_DeprecationWarning, 1, - "%.400s provides _fields but not _field_types. " - "This will become an error in Python 3.15.", - Py_TYPE(self)->tp_name - ) < 0) { - res = -1; - } + // Probably a user-defined subclass of AST that lacks _field_types. + // This will continue to work as it did before 3.13; i.e., attributes + // that are not passed in simply do not exist on the instance. goto cleanup; } remaining_list = PySequence_List(remaining_fields); @@ -5196,12 +5191,21 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) PyObject *name = PyList_GET_ITEM(remaining_list, i); PyObject *type = PyDict_GetItemWithError(field_types, name); if (!type) { - if (!PyErr_Occurred()) { - PyErr_SetObject(PyExc_KeyError, name); + if (PyErr_Occurred()) { + goto set_remaining_cleanup; + } + else { + if (PyErr_WarnFormat( + PyExc_DeprecationWarning, 1, + "Field '%U' is missing from %.400s._field_types. " + "This will become an error in Python 3.15.", + name, Py_TYPE(self)->tp_name + ) < 0) { + goto set_remaining_cleanup; + } } - goto set_remaining_cleanup; } - if (_PyUnion_Check(type)) { + else if (_PyUnion_Check(type)) { // optional field // do nothing, we'll have set a None default on the class } @@ -5225,8 +5229,7 @@ ast_type_init(PyObject *self, PyObject *args, PyObject *kw) "This will become an error in Python 3.15.", Py_TYPE(self)->tp_name, name ) < 0) { - res = -1; - goto cleanup; + goto set_remaining_cleanup; } } } From 430945db4c5589197c6d18462e89ca5ae9e38ba6 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 6 May 2024 16:21:39 -0700 Subject: [PATCH 09/15] GH-118251: Bump the JIT CI timeout to 90 minutes (#118661) --- .github/workflows/jit.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index f68567c8a3153e..7152cde8f4607c 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -23,7 +23,7 @@ jobs: jit: name: ${{ matrix.target }} (${{ matrix.debug && 'Debug' || 'Release' }}) runs-on: ${{ matrix.runner }} - timeout-minutes: 75 + timeout-minutes: 90 strategy: fail-fast: false matrix: From 636b8d94c91324c7e49a7cfe914f162690adf488 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 6 May 2024 16:31:09 -0700 Subject: [PATCH 10/15] gh-112075: Fix race in constructing dict for instance (#118499) --- Include/internal/pycore_dict.h | 2 +- Lib/test/test_free_threading/test_dict.py | 141 ++++++++++++++++++++++ Objects/dictobject.c | 140 ++++++++++----------- Objects/object.c | 4 +- 4 files changed, 216 insertions(+), 71 deletions(-) create mode 100644 Lib/test/test_free_threading/test_dict.py diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 3ba8ee74b4df87..cb7d4c3219a9af 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -105,10 +105,10 @@ PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObjec /* Consumes references to key and value */ PyAPI_FUNC(int) _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); -extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value); extern int _PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value); extern int _PyDict_GetItemRef_Unicode_LockHeld(PyDictObject *op, PyObject *key, PyObject **result); extern int _PyDict_GetItemRef_KnownHash(PyDictObject *op, PyObject *key, Py_hash_t hash, PyObject **result); +extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject *obj, PyObject **dictptr, PyObject *name, PyObject *value); extern int _PyDict_Pop_KnownHash( PyDictObject *dict, diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py new file mode 100644 index 00000000000000..6a909dd3ee025f --- /dev/null +++ b/Lib/test/test_free_threading/test_dict.py @@ -0,0 +1,141 @@ +import gc +import time +import unittest +import weakref + +from ast import Or +from functools import partial +from threading import Thread +from unittest import TestCase + +from test.support import threading_helper + + +@threading_helper.requires_working_threading() +class TestDict(TestCase): + def test_racing_creation_shared_keys(self): + """Verify that creating dictionaries is thread safe when we + have a type with shared keys""" + class C(int): + pass + + self.racing_creation(C) + + def test_racing_creation_no_shared_keys(self): + """Verify that creating dictionaries is thread safe when we + have a type with an ordinary dict""" + self.racing_creation(Or) + + def test_racing_creation_inline_values_invalid(self): + """Verify that re-creating a dict after we have invalid inline values + is thread safe""" + class C: + pass + + def make_obj(): + a = C() + # Make object, make inline values invalid, and then delete dict + a.__dict__ = {} + del a.__dict__ + return a + + self.racing_creation(make_obj) + + def test_racing_creation_nonmanaged_dict(self): + """Verify that explicit creation of an unmanaged dict is thread safe + outside of the normal attribute setting code path""" + def make_obj(): + def f(): pass + return f + + def set(func, name, val): + # Force creation of the dict via PyObject_GenericGetDict + func.__dict__[name] = val + + self.racing_creation(make_obj, set) + + def racing_creation(self, cls, set=setattr): + objects = [] + processed = [] + + OBJECT_COUNT = 100 + THREAD_COUNT = 10 + CUR = 0 + + for i in range(OBJECT_COUNT): + objects.append(cls()) + + def writer_func(name): + last = -1 + while True: + if CUR == last: + continue + elif CUR == OBJECT_COUNT: + break + + obj = objects[CUR] + set(obj, name, name) + last = CUR + processed.append(name) + + writers = [] + for x in range(THREAD_COUNT): + writer = Thread(target=partial(writer_func, f"a{x:02}")) + writers.append(writer) + writer.start() + + for i in range(OBJECT_COUNT): + CUR = i + while len(processed) != THREAD_COUNT: + time.sleep(0.001) + processed.clear() + + CUR = OBJECT_COUNT + + for writer in writers: + writer.join() + + for obj_idx, obj in enumerate(objects): + assert ( + len(obj.__dict__) == THREAD_COUNT + ), f"{len(obj.__dict__)} {obj.__dict__!r} {obj_idx}" + for i in range(THREAD_COUNT): + assert f"a{i:02}" in obj.__dict__, f"a{i:02} missing at {obj_idx}" + + def test_racing_set_dict(self): + """Races assigning to __dict__ should be thread safe""" + + def f(): pass + l = [] + THREAD_COUNT = 10 + class MyDict(dict): pass + + def writer_func(l): + for i in range(1000): + d = MyDict() + l.append(weakref.ref(d)) + f.__dict__ = d + + lists = [] + writers = [] + for x in range(THREAD_COUNT): + thread_list = [] + lists.append(thread_list) + writer = Thread(target=partial(writer_func, thread_list)) + writers.append(writer) + + for writer in writers: + writer.start() + + for writer in writers: + writer.join() + + f.__dict__ = {} + gc.collect() + + for thread_list in lists: + for ref in thread_list: + self.assertIsNone(ref()) + +if __name__ == "__main__": + unittest.main() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3e662e09ea598e..b0fce09d7940e0 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -924,16 +924,15 @@ new_dict(PyInterpreterState *interp, return (PyObject *)mp; } -/* Consumes a reference to the keys object */ static PyObject * new_dict_with_shared_keys(PyInterpreterState *interp, PyDictKeysObject *keys) { size_t size = shared_keys_usable_size(keys); PyDictValues *values = new_values(size); if (values == NULL) { - dictkeys_decref(interp, keys, false); return PyErr_NoMemory(); } + dictkeys_incref(keys); for (size_t i = 0; i < size; i++) { values->values[i] = NULL; } @@ -6693,8 +6692,6 @@ materialize_managed_dict_lock_held(PyObject *obj) { _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); - OBJECT_STAT_INC(dict_materialized_on_request); - PyDictValues *values = _PyObject_InlineValues(obj); PyInterpreterState *interp = _PyInterpreterState_GET(); PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj)); @@ -7186,35 +7183,77 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) return 0; } -PyObject * -PyObject_GenericGetDict(PyObject *obj, void *context) +static inline PyObject * +ensure_managed_dict(PyObject *obj) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyTypeObject *tp = Py_TYPE(obj); - PyDictObject *dict; - if (_PyType_HasFeature(tp, Py_TPFLAGS_MANAGED_DICT)) { - dict = _PyObject_GetManagedDict(obj); - if (dict == NULL && - (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && + PyDictObject *dict = _PyObject_GetManagedDict(obj); + if (dict == NULL) { + PyTypeObject *tp = Py_TYPE(obj); + if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) && FT_ATOMIC_LOAD_UINT8(_PyObject_InlineValues(obj)->valid)) { dict = _PyObject_MaterializeManagedDict(obj); } - else if (dict == NULL) { - Py_BEGIN_CRITICAL_SECTION(obj); - + else { +#ifdef Py_GIL_DISABLED // Check again that we're not racing with someone else creating the dict + Py_BEGIN_CRITICAL_SECTION(obj); dict = _PyObject_GetManagedDict(obj); - if (dict == NULL) { - OBJECT_STAT_INC(dict_materialized_on_request); - dictkeys_incref(CACHED_KEYS(tp)); - dict = (PyDictObject *)new_dict_with_shared_keys(interp, CACHED_KEYS(tp)); - FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, - (PyDictObject *)dict); + if (dict != NULL) { + goto done; } +#endif + dict = (PyDictObject *)new_dict_with_shared_keys(_PyInterpreterState_GET(), + CACHED_KEYS(tp)); + FT_ATOMIC_STORE_PTR_RELEASE(_PyObject_ManagedDictPointer(obj)->dict, + (PyDictObject *)dict); +#ifdef Py_GIL_DISABLED +done: Py_END_CRITICAL_SECTION(); +#endif } - return Py_XNewRef((PyObject *)dict); + } + return (PyObject *)dict; +} + +static inline PyObject * +ensure_nonmanaged_dict(PyObject *obj, PyObject **dictptr) +{ + PyDictKeysObject *cached; + + PyObject *dict = FT_ATOMIC_LOAD_PTR_ACQUIRE(*dictptr); + if (dict == NULL) { +#ifdef Py_GIL_DISABLED + Py_BEGIN_CRITICAL_SECTION(obj); + dict = *dictptr; + if (dict != NULL) { + goto done; + } +#endif + PyTypeObject *tp = Py_TYPE(obj); + if (_PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(!_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)); + dict = new_dict_with_shared_keys(interp, cached); + } + else { + dict = PyDict_New(); + } + FT_ATOMIC_STORE_PTR_RELEASE(*dictptr, dict); +#ifdef Py_GIL_DISABLED +done: + Py_END_CRITICAL_SECTION(); +#endif + } + return dict; +} + +PyObject * +PyObject_GenericGetDict(PyObject *obj, void *context) +{ + PyTypeObject *tp = Py_TYPE(obj); + if (_PyType_HasFeature(tp, Py_TPFLAGS_MANAGED_DICT)) { + return Py_XNewRef(ensure_managed_dict(obj)); } else { PyObject **dictptr = _PyObject_ComputedDictPointer(obj); @@ -7223,65 +7262,28 @@ PyObject_GenericGetDict(PyObject *obj, void *context) "This object has no __dict__"); return NULL; } - PyObject *dict = *dictptr; - if (dict == NULL) { - PyTypeObject *tp = Py_TYPE(obj); - if (_PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) { - dictkeys_incref(CACHED_KEYS(tp)); - *dictptr = dict = new_dict_with_shared_keys( - interp, CACHED_KEYS(tp)); - } - else { - *dictptr = dict = PyDict_New(); - } - } - return Py_XNewRef(dict); + + return Py_XNewRef(ensure_nonmanaged_dict(obj, dictptr)); } } int -_PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, +_PyObjectDict_SetItem(PyTypeObject *tp, PyObject *obj, PyObject **dictptr, PyObject *key, PyObject *value) { PyObject *dict; int res; - PyDictKeysObject *cached; - PyInterpreterState *interp = _PyInterpreterState_GET(); assert(dictptr != NULL); - if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) { - assert(dictptr != NULL); - dict = *dictptr; - if (dict == NULL) { - assert(!_PyType_HasFeature(tp, Py_TPFLAGS_INLINE_VALUES)); - dictkeys_incref(cached); - dict = new_dict_with_shared_keys(interp, cached); - if (dict == NULL) - return -1; - *dictptr = dict; - } - if (value == NULL) { - res = PyDict_DelItem(dict, key); - } - else { - res = PyDict_SetItem(dict, key, value); - } - } else { - dict = *dictptr; - if (dict == NULL) { - dict = PyDict_New(); - if (dict == NULL) - return -1; - *dictptr = dict; - } - if (value == NULL) { - res = PyDict_DelItem(dict, key); - } else { - res = PyDict_SetItem(dict, key, value); - } + dict = ensure_nonmanaged_dict(obj, dictptr); + if (dict == NULL) { + return -1; } + Py_BEGIN_CRITICAL_SECTION(dict); + res = _PyDict_SetItem_LockHeld((PyDictObject *)dict, key, value); ASSERT_CONSISTENT(dict); + Py_END_CRITICAL_SECTION(); return res; } diff --git a/Objects/object.c b/Objects/object.c index effbd51991eaa5..8ad0389cbc7626 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1731,7 +1731,7 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, goto done; } else { - res = _PyObjectDict_SetItem(tp, dictptr, name, value); + res = _PyObjectDict_SetItem(tp, obj, dictptr, name, value); } } else { @@ -1789,7 +1789,9 @@ PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context) "not a '%.200s'", Py_TYPE(value)->tp_name); return -1; } + Py_BEGIN_CRITICAL_SECTION(obj); Py_XSETREF(*dictptr, Py_NewRef(value)); + Py_END_CRITICAL_SECTION(); return 0; } From e272195b3eff3a78e334a601a637d198b8de2319 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 6 May 2024 16:45:04 -0700 Subject: [PATCH 11/15] gh-118362: Skip tests when threading isn't available (#118666) * Skip tests when threads aren't available * Use ThreadPoolExecutor --- Lib/test/test_free_threading/test_list.py | 4 ++-- Lib/test/test_free_threading/test_monitoring.py | 12 ++++++------ Lib/test/test_free_threading/test_type.py | 16 ++++++++-------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Lib/test/test_free_threading/test_list.py b/Lib/test/test_free_threading/test_list.py index 79cb0a93092365..6ad806d67a80ed 100644 --- a/Lib/test/test_free_threading/test_list.py +++ b/Lib/test/test_free_threading/test_list.py @@ -3,7 +3,7 @@ from threading import Thread from unittest import TestCase -from test.support import is_wasi +from test.support import threading_helper class C: @@ -11,7 +11,7 @@ def __init__(self, v): self.v = v -@unittest.skipIf(is_wasi, "WASI has no threads.") +@threading_helper.requires_working_threading() class TestList(TestCase): def test_racing_iter_append(self): diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py index fa588046bf9923..3a3f1ba3b605d6 100644 --- a/Lib/test/test_free_threading/test_monitoring.py +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -7,7 +7,7 @@ import weakref from sys import monitoring -from test.support import is_wasi +from test.support import threading_helper from threading import Thread, _PyRLock from unittest import TestCase @@ -87,7 +87,7 @@ def tearDown(self): monitoring.free_tool_id(self.tool_id) -@unittest.skipIf(is_wasi, "WASI has no threads.") +@threading_helper.requires_working_threading() class SetPreTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): """Sets tracing one time after the threads have started""" @@ -106,7 +106,7 @@ def after_threads(self): sys.settrace(self.trace_func) -@unittest.skipIf(is_wasi, "WASI has no threads.") +@threading_helper.requires_working_threading() class MonitoringMultiThreaded( MonitoringTestMixin, InstrumentationMultiThreadedMixin, TestCase ): @@ -140,7 +140,7 @@ def during_threads(self): self.set = not self.set -@unittest.skipIf(is_wasi, "WASI has no threads.") +@threading_helper.requires_working_threading() class SetTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): """Uses sys.settrace and repeatedly toggles instrumentation on and off""" @@ -166,7 +166,7 @@ def during_threads(self): self.set = not self.set -@unittest.skipIf(is_wasi, "WASI has no threads.") +@threading_helper.requires_working_threading() class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): """Uses sys.setprofile and repeatedly toggles instrumentation on and off""" @@ -192,7 +192,7 @@ def during_threads(self): self.set = not self.set -@unittest.skipIf(is_wasi, "WASI has no threads.") +@threading_helper.requires_working_threading() class MonitoringMisc(MonitoringTestMixin, TestCase): def register_callback(self): def callback(*args): diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py index 76208c42fc8aea..6eead198deed46 100644 --- a/Lib/test/test_free_threading/test_type.py +++ b/Lib/test/test_free_threading/test_type.py @@ -1,10 +1,11 @@ import unittest +from concurrent.futures import ThreadPoolExecutor from threading import Thread -from multiprocessing.dummy import Pool from unittest import TestCase -from test.support import is_wasi +from test.support import threading_helper, import_helper + NTHREADS = 6 @@ -15,7 +16,7 @@ class A: attr = 1 -@unittest.skipIf(is_wasi, "WASI has no threads.") +@threading_helper.requires_working_threading() class TestType(TestCase): def test_attr_cache(self): def read(id0): @@ -34,11 +35,10 @@ def write(id0): A.attr = x - with Pool(NTHREADS) as pool: - pool.apply_async(read, (1,)) - pool.apply_async(write, (1,)) - pool.close() - pool.join() + with ThreadPoolExecutor(NTHREADS) as pool: + pool.submit(read, (1,)) + pool.submit(write, (1,)) + pool.shutdown(wait=True) def test_attr_cache_consistency(self): class C: From 8d8275b0cf43f0e20c72a9641cbddc5044cdae04 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" <69878+youknowone@users.noreply.github.com> Date: Tue, 7 May 2024 09:02:52 +0900 Subject: [PATCH 12/15] gh-118473: Fix set_asyncgen_hooks not to be partially set when arguments are invalid (#118474) --- Lib/test/test_sys.py | 16 ++++++++++- ...-05-01-22-43-54.gh-issue-118473.QIvq9R.rst | 1 + Python/sysmodule.c | 28 +++++++++++++------ 3 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-01-22-43-54.gh-issue-118473.QIvq9R.rst diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 944e84e88c8a35..ee3bd0092f9bf3 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1788,6 +1788,21 @@ def test_asyncgen_hooks(self): self.assertIsNone(old.finalizer) firstiter = lambda *a: None + finalizer = lambda *a: None + + with self.assertRaises(TypeError): + sys.set_asyncgen_hooks(firstiter=firstiter, finalizer="invalid") + cur = sys.get_asyncgen_hooks() + self.assertIsNone(cur.firstiter) + self.assertIsNone(cur.finalizer) + + # gh-118473 + with self.assertRaises(TypeError): + sys.set_asyncgen_hooks(firstiter="invalid", finalizer=finalizer) + cur = sys.get_asyncgen_hooks() + self.assertIsNone(cur.firstiter) + self.assertIsNone(cur.finalizer) + sys.set_asyncgen_hooks(firstiter=firstiter) hooks = sys.get_asyncgen_hooks() self.assertIs(hooks.firstiter, firstiter) @@ -1795,7 +1810,6 @@ def test_asyncgen_hooks(self): self.assertIs(hooks.finalizer, None) self.assertIs(hooks[1], None) - finalizer = lambda *a: None sys.set_asyncgen_hooks(finalizer=finalizer) hooks = sys.get_asyncgen_hooks() self.assertIs(hooks.firstiter, firstiter) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-01-22-43-54.gh-issue-118473.QIvq9R.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-01-22-43-54.gh-issue-118473.QIvq9R.rst new file mode 100644 index 00000000000000..9d65e3c403ff88 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-01-22-43-54.gh-issue-118473.QIvq9R.rst @@ -0,0 +1 @@ +Fix :func:`sys.set_asyncgen_hooks` not to be partially set when raising :exc:`TypeError`. diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 417040c891f803..2851247628c32a 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1399,12 +1399,6 @@ sys_set_asyncgen_hooks(PyObject *self, PyObject *args, PyObject *kw) Py_TYPE(finalizer)->tp_name); return NULL; } - if (_PyEval_SetAsyncGenFinalizer(finalizer) < 0) { - return NULL; - } - } - else if (finalizer == Py_None && _PyEval_SetAsyncGenFinalizer(NULL) < 0) { - return NULL; } if (firstiter && firstiter != Py_None) { @@ -1414,15 +1408,33 @@ sys_set_asyncgen_hooks(PyObject *self, PyObject *args, PyObject *kw) Py_TYPE(firstiter)->tp_name); return NULL; } - if (_PyEval_SetAsyncGenFirstiter(firstiter) < 0) { + } + + PyObject *cur_finalizer = _PyEval_GetAsyncGenFinalizer(); + + if (finalizer && finalizer != Py_None) { + if (_PyEval_SetAsyncGenFinalizer(finalizer) < 0) { return NULL; } } - else if (firstiter == Py_None && _PyEval_SetAsyncGenFirstiter(NULL) < 0) { + else if (finalizer == Py_None && _PyEval_SetAsyncGenFinalizer(NULL) < 0) { return NULL; } + if (firstiter && firstiter != Py_None) { + if (_PyEval_SetAsyncGenFirstiter(firstiter) < 0) { + goto error; + } + } + else if (firstiter == Py_None && _PyEval_SetAsyncGenFirstiter(NULL) < 0) { + goto error; + } + Py_RETURN_NONE; + +error: + _PyEval_SetAsyncGenFinalizer(cur_finalizer); + return NULL; } PyDoc_STRVAR(set_asyncgen_hooks_doc, From 723d4d2fe8e77b398f0ccffcfa541149caaac6a1 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 6 May 2024 20:12:39 -0400 Subject: [PATCH 13/15] gh-118527: Intern code consts in free-threaded build (#118667) We already intern and immortalize most string constants. In the free-threaded build, other constants can be a source of reference count contention because they are shared by all threads running the same code objects. --- Include/internal/pycore_code.h | 10 + Include/internal/pycore_interp.h | 1 + Include/internal/pycore_setobject.h | 3 + Lib/test/support/__init__.py | 9 + Lib/test/test_code.py | 23 +- Lib/test/test_ctypes/test_internals.py | 2 +- Lib/test/test_ctypes/test_python_api.py | 2 +- Lib/test/test_memoryio.py | 2 +- Modules/_testinternalcapi.c | 12 + Objects/codeobject.c | 302 ++++++++++++++++++++++-- Objects/setobject.c | 6 + Python/bltinmodule.c | 13 + Python/pylifecycle.c | 7 + Python/pystate.c | 1 + 14 files changed, 375 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 8d832c59e36874..bcbaf60f226c77 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -8,6 +8,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_lock.h" // PyMutex + // We hide some of the newer PyCodeObject fields behind macros. // This helps with backporting certain changes to 3.12. @@ -16,6 +18,14 @@ extern "C" { #define _PyCode_HAS_INSTRUMENTATION(CODE) \ (CODE->_co_instrumentation_version > 0) +struct _py_code_state { + PyMutex mutex; + // Interned constants from code objects. Used by the free-threaded build. + struct _Py_hashtable_t *constants; +}; + +extern PyStatus _PyCode_Init(PyInterpreterState *interp); +extern void _PyCode_Fini(PyInterpreterState *interp); #define CODE_MAX_WATCHERS 8 diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index a2aec6dd4b7241..86dada5061e7b5 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -245,6 +245,7 @@ struct _is { struct _Py_long_state long_state; struct _dtoa_state dtoa; struct _py_func_state func_state; + struct _py_code_state code_state; struct _Py_dict_state dict_state; struct _Py_exc_state exc_state; diff --git a/Include/internal/pycore_setobject.h b/Include/internal/pycore_setobject.h index 41b351ead25dd1..0494c07fe1869d 100644 --- a/Include/internal/pycore_setobject.h +++ b/Include/internal/pycore_setobject.h @@ -30,6 +30,9 @@ PyAPI_DATA(PyObject *) _PySet_Dummy; PyAPI_FUNC(int) _PySet_Contains(PySetObject *so, PyObject *key); +// Clears the set without acquiring locks. Used by _PyCode_Fini. +extern void _PySet_ClearInternal(PySetObject *so); + #ifdef __cplusplus } #endif diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 999fffb03ed59a..e2a854663ddb7d 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -535,6 +535,15 @@ def suppress_immortalization(suppress=True): finally: _testinternalcapi.set_immortalize_deferred(*old_values) +def skip_if_suppress_immortalization(): + try: + import _testinternalcapi + except ImportError: + return + return unittest.skipUnless(_testinternalcapi.get_immortalize_deferred(), + "requires immortalization of deferred objects") + + MS_WINDOWS = (sys.platform == 'win32') # Is not actually used in tests, but is kept for compatibility. diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index aa793f56225393..059fab1e2ecc08 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -142,7 +142,8 @@ from test.support import (cpython_only, check_impl_detail, requires_debug_ranges, gc_collect, Py_GIL_DISABLED, - suppress_immortalization) + suppress_immortalization, + skip_if_suppress_immortalization) from test.support.script_helper import assert_python_ok from test.support import threading_helper, import_helper from test.support.bytecode_helper import instructions_with_positions @@ -570,11 +571,31 @@ def f(a='str_value'): self.assertIsInterned(f()) @cpython_only + @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") def test_interned_string_with_null(self): co = compile(r'res = "str\0value!"', '?', 'exec') v = self.find_const(co.co_consts, 'str\0value!') self.assertIsNotInterned(v) + @cpython_only + @unittest.skipUnless(Py_GIL_DISABLED, "does not intern all constants") + @skip_if_suppress_immortalization() + def test_interned_constants(self): + # compile separately to avoid compile time de-duping + + globals = {} + exec(textwrap.dedent(""" + def func1(): + return (0.0, (1, 2, "hello")) + """), globals) + + exec(textwrap.dedent(""" + def func2(): + return (0.0, (1, 2, "hello")) + """), globals) + + self.assertTrue(globals["func1"]() is globals["func2"]()) + class CodeWeakRefTest(unittest.TestCase): diff --git a/Lib/test/test_ctypes/test_internals.py b/Lib/test/test_ctypes/test_internals.py index 94c9a86c2d06df..778da6573da975 100644 --- a/Lib/test/test_ctypes/test_internals.py +++ b/Lib/test/test_ctypes/test_internals.py @@ -28,7 +28,7 @@ def test_ints(self): self.assertEqual(ci._objects, None) def test_c_char_p(self): - s = b"Hello, World" + s = "Hello, World".encode("ascii") refcnt = sys.getrefcount(s) cs = c_char_p(s) self.assertEqual(refcnt + 1, sys.getrefcount(s)) diff --git a/Lib/test/test_ctypes/test_python_api.py b/Lib/test/test_ctypes/test_python_api.py index 77da35855928a4..1072a109833261 100644 --- a/Lib/test/test_ctypes/test_python_api.py +++ b/Lib/test/test_ctypes/test_python_api.py @@ -47,7 +47,7 @@ def test_PyLong_Long(self): @support.refcount_test def test_PyObj_FromPtr(self): - s = "abc def ghi jkl" + s = object() ref = sys.getrefcount(s) # id(python-object) is the address pyobj = _ctypes.PyObj_FromPtr(id(s)) diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py index 8192502a40791b..95629ed862d6eb 100644 --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -801,7 +801,7 @@ def test_sizeof(self): def _test_cow_mutation(self, mutation): # Common code for all BytesIO copy-on-write mutation tests. - imm = b' ' * 1024 + imm = (' ' * 1024).encode("ascii") old_rc = sys.getrefcount(imm) memio = self.ioclass(imm) self.assertEqual(sys.getrefcount(imm), old_rc + 1) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index de98af32b5dff7..e209c7e05264f2 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1985,6 +1985,17 @@ set_immortalize_deferred(PyObject *self, PyObject *value) #endif } +static PyObject * +get_immortalize_deferred(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ +#ifdef Py_GIL_DISABLED + PyInterpreterState *interp = PyInterpreterState_Get(); + return PyBool_FromLong(interp->gc.immortalize.enable_on_thread_created); +#else + Py_RETURN_FALSE; +#endif +} + static PyObject * has_inline_values(PyObject *self, PyObject *obj) { @@ -2081,6 +2092,7 @@ static PyMethodDef module_functions[] = { {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif {"set_immortalize_deferred", set_immortalize_deferred, METH_VARARGS}, + {"get_immortalize_deferred", get_immortalize_deferred, METH_NOARGS}, #ifdef _Py_TIER2 {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS}, #endif diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 2ce5f7dca2eb5c..3d804f73a54088 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -5,6 +5,8 @@ #include "pycore_code.h" // _PyCodeConstructor #include "pycore_frame.h" // FRAME_SPECIALS_SIZE +#include "pycore_hashtable.h" // _Py_hashtable_t +#include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.co_extra_freefuncs #include "pycore_object.h" // _PyObject_SetDeferredRefcount #include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches @@ -100,10 +102,20 @@ PyCode_ClearWatcher(int watcher_id) * generic helpers ******************/ -/* all_name_chars(s): true iff s matches [a-zA-Z0-9_]* */ static int -all_name_chars(PyObject *o) +should_intern_string(PyObject *o) { +#ifdef Py_GIL_DISABLED + // The free-threaded build interns (and immortalizes) all string constants + // unless we've disabled immortalizing objects that use deferred reference + // counting. + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->gc.immortalize.enable_on_thread_created) { + return 1; + } +#endif + + // compute if s matches [a-zA-Z0-9_] const unsigned char *s, *e; if (!PyUnicode_IS_ASCII(o)) @@ -118,6 +130,10 @@ all_name_chars(PyObject *o) return 1; } +#ifdef Py_GIL_DISABLED +static PyObject *intern_one_constant(PyObject *op); +#endif + static int intern_strings(PyObject *tuple) { @@ -135,14 +151,16 @@ intern_strings(PyObject *tuple) return 0; } -/* Intern selected string constants */ +/* Intern constants. In the default build, this interns selected string + constants. In the free-threaded build, this also interns non-string + constants. */ static int -intern_string_constants(PyObject *tuple, int *modified) +intern_constants(PyObject *tuple, int *modified) { for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (PyUnicode_CheckExact(v)) { - if (all_name_chars(v)) { + if (should_intern_string(v)) { PyObject *w = v; PyUnicode_InternInPlace(&v); if (w != v) { @@ -154,7 +172,7 @@ intern_string_constants(PyObject *tuple, int *modified) } } else if (PyTuple_CheckExact(v)) { - if (intern_string_constants(v, NULL) < 0) { + if (intern_constants(v, NULL) < 0) { return -1; } } @@ -165,7 +183,7 @@ intern_string_constants(PyObject *tuple, int *modified) return -1; } int tmp_modified = 0; - if (intern_string_constants(tmp, &tmp_modified) < 0) { + if (intern_constants(tmp, &tmp_modified) < 0) { Py_DECREF(tmp); return -1; } @@ -184,6 +202,59 @@ intern_string_constants(PyObject *tuple, int *modified) } Py_DECREF(tmp); } +#ifdef Py_GIL_DISABLED + else if (PySlice_Check(v)) { + PySliceObject *slice = (PySliceObject *)v; + PyObject *tmp = PyTuple_New(3); + if (tmp == NULL) { + return -1; + } + PyTuple_SET_ITEM(tmp, 0, Py_NewRef(slice->start)); + PyTuple_SET_ITEM(tmp, 1, Py_NewRef(slice->stop)); + PyTuple_SET_ITEM(tmp, 2, Py_NewRef(slice->step)); + int tmp_modified = 0; + if (intern_constants(tmp, &tmp_modified) < 0) { + Py_DECREF(tmp); + return -1; + } + if (tmp_modified) { + v = PySlice_New(PyTuple_GET_ITEM(tmp, 0), + PyTuple_GET_ITEM(tmp, 1), + PyTuple_GET_ITEM(tmp, 2)); + if (v == NULL) { + Py_DECREF(tmp); + return -1; + } + PyTuple_SET_ITEM(tuple, i, v); + Py_DECREF(slice); + if (modified) { + *modified = 1; + } + } + Py_DECREF(tmp); + } + + // Intern non-string consants in the free-threaded build, but only if + // we are also immortalizing objects that use deferred reference + // counting. + PyThreadState *tstate = PyThreadState_GET(); + if (!_Py_IsImmortal(v) && !PyCode_Check(v) && + !PyUnicode_CheckExact(v) && + tstate->interp->gc.immortalize.enable_on_thread_created) + { + PyObject *interned = intern_one_constant(v); + if (interned == NULL) { + return -1; + } + else if (interned != v) { + PyTuple_SET_ITEM(tuple, i, interned); + Py_SETREF(v, interned); + if (modified) { + *modified = 1; + } + } + } +#endif } return 0; } @@ -540,18 +611,41 @@ remove_column_info(PyObject *locations) return res; } -/* The caller is responsible for ensuring that the given data is valid. */ - -PyCodeObject * -_PyCode_New(struct _PyCodeConstructor *con) +static int +intern_code_constants(struct _PyCodeConstructor *con) { +#ifdef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _py_code_state *state = &interp->code_state; + PyMutex_Lock(&state->mutex); +#endif if (intern_strings(con->names) < 0) { - return NULL; + goto error; } - if (intern_string_constants(con->consts, NULL) < 0) { - return NULL; + if (intern_constants(con->consts, NULL) < 0) { + goto error; } if (intern_strings(con->localsplusnames) < 0) { + goto error; + } +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&state->mutex); +#endif + return 0; + +error: +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&state->mutex); +#endif + return -1; +} + +/* The caller is responsible for ensuring that the given data is valid. */ + +PyCodeObject * +_PyCode_New(struct _PyCodeConstructor *con) +{ + if (intern_code_constants(con) < 0) { return NULL; } @@ -2397,3 +2491,183 @@ _PyCode_ConstantKey(PyObject *op) } return key; } + +#ifdef Py_GIL_DISABLED +static PyObject * +intern_one_constant(PyObject *op) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _Py_hashtable_t *consts = interp->code_state.constants; + + assert(!PyUnicode_CheckExact(op)); // strings are interned separately + + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts, op); + if (entry == NULL) { + if (_Py_hashtable_set(consts, op, op) != 0) { + return NULL; + } + +#ifdef Py_REF_DEBUG + Py_ssize_t refcnt = Py_REFCNT(op); + if (refcnt != 1) { + // Adjust the reftotal to account for the fact that we only + // restore a single reference in _PyCode_Fini. + _Py_AddRefTotal(_PyThreadState_GET(), -(refcnt - 1)); + } +#endif + + _Py_SetImmortal(op); + return op; + } + + assert(_Py_IsImmortal(entry->value)); + return (PyObject *)entry->value; +} + +static int +compare_constants(const void *key1, const void *key2) { + PyObject *op1 = (PyObject *)key1; + PyObject *op2 = (PyObject *)key2; + if (op1 == op2) { + return 1; + } + if (Py_TYPE(op1) != Py_TYPE(op2)) { + return 0; + } + // We compare container contents by identity because we have already + // internalized the items. + if (PyTuple_CheckExact(op1)) { + Py_ssize_t size = PyTuple_GET_SIZE(op1); + if (size != PyTuple_GET_SIZE(op2)) { + return 0; + } + for (Py_ssize_t i = 0; i < size; i++) { + if (PyTuple_GET_ITEM(op1, i) != PyTuple_GET_ITEM(op2, i)) { + return 0; + } + } + return 1; + } + else if (PyFrozenSet_CheckExact(op1)) { + if (PySet_GET_SIZE(op1) != PySet_GET_SIZE(op2)) { + return 0; + } + Py_ssize_t pos1 = 0, pos2 = 0; + PyObject *obj1, *obj2; + Py_hash_t hash1, hash2; + while ((_PySet_NextEntry(op1, &pos1, &obj1, &hash1)) && + (_PySet_NextEntry(op2, &pos2, &obj2, &hash2))) + { + if (obj1 != obj2) { + return 0; + } + } + return 1; + } + else if (PySlice_Check(op1)) { + PySliceObject *s1 = (PySliceObject *)op1; + PySliceObject *s2 = (PySliceObject *)op2; + return (s1->start == s2->start && + s1->stop == s2->stop && + s1->step == s2->step); + } + else if (PyBytes_CheckExact(op1) || PyLong_CheckExact(op1)) { + return PyObject_RichCompareBool(op1, op2, Py_EQ); + } + else if (PyFloat_CheckExact(op1)) { + // Ensure that, for example, +0.0 and -0.0 are distinct + double f1 = PyFloat_AS_DOUBLE(op1); + double f2 = PyFloat_AS_DOUBLE(op2); + return memcmp(&f1, &f2, sizeof(double)) == 0; + } + else if (PyComplex_CheckExact(op1)) { + Py_complex c1 = ((PyComplexObject *)op1)->cval; + Py_complex c2 = ((PyComplexObject *)op2)->cval; + return memcmp(&c1, &c2, sizeof(Py_complex)) == 0; + } + _Py_FatalErrorFormat("unexpected type in compare_constants: %s", + Py_TYPE(op1)->tp_name); + return 0; +} + +static Py_uhash_t +hash_const(const void *key) +{ + PyObject *op = (PyObject *)key; + if (PySlice_Check(op)) { + PySliceObject *s = (PySliceObject *)op; + PyObject *data[3] = { s->start, s->stop, s->step }; + return _Py_HashBytes(&data, sizeof(data)); + } + else if (PyTuple_CheckExact(op)) { + Py_ssize_t size = PyTuple_GET_SIZE(op); + PyObject **data = _PyTuple_ITEMS(op); + return _Py_HashBytes(data, sizeof(PyObject *) * size); + } + Py_hash_t h = PyObject_Hash(op); + if (h == -1) { + // This should never happen: all the constants we support have + // infallible hash functions. + Py_FatalError("code: hash failed"); + } + return (Py_uhash_t)h; +} + +static int +clear_containers(_Py_hashtable_t *ht, const void *key, const void *value, + void *user_data) +{ + // First clear containers to avoid recursive deallocation later on in + // destroy_key. + PyObject *op = (PyObject *)key; + if (PyTuple_CheckExact(op)) { + for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(op); i++) { + Py_CLEAR(_PyTuple_ITEMS(op)[i]); + } + } + else if (PySlice_Check(op)) { + PySliceObject *slice = (PySliceObject *)op; + Py_SETREF(slice->start, Py_None); + Py_SETREF(slice->stop, Py_None); + Py_SETREF(slice->step, Py_None); + } + else if (PyFrozenSet_CheckExact(op)) { + _PySet_ClearInternal((PySetObject *)op); + } + return 0; +} + +static void +destroy_key(void *key) +{ + _Py_ClearImmortal(key); +} +#endif + +PyStatus +_PyCode_Init(PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + struct _py_code_state *state = &interp->code_state; + state->constants = _Py_hashtable_new_full(&hash_const, &compare_constants, + &destroy_key, NULL, NULL); + if (state->constants == NULL) { + return _PyStatus_NO_MEMORY(); + } +#endif + return _PyStatus_OK(); +} + +void +_PyCode_Fini(PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + // Free interned constants + struct _py_code_state *state = &interp->code_state; + if (state->constants) { + _Py_hashtable_foreach(state->constants, &clear_containers, NULL); + _Py_hashtable_destroy(state->constants); + state->constants = NULL; + } +#endif +} diff --git a/Objects/setobject.c b/Objects/setobject.c index 19975e3d4d18e2..68986bb6a6b557 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -2621,6 +2621,12 @@ PySet_Clear(PyObject *set) return 0; } +void +_PySet_ClearInternal(PySetObject *so) +{ + (void)set_clear_internal(so); +} + int PySet_Contains(PyObject *anyset, PyObject *key) { diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index e7b60ebca5e2d7..d192d5be751cfc 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -866,8 +866,21 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename, if (str == NULL) goto error; +#ifdef Py_GIL_DISABLED + // gh-118527: Disable immortalization of code constants for explicit + // compile() calls to get consistent frozen outputs between the default + // and free-threaded builds. + PyInterpreterState *interp = _PyInterpreterState_GET(); + int old_value = interp->gc.immortalize.enable_on_thread_created; + interp->gc.immortalize.enable_on_thread_created = 0; +#endif + result = Py_CompileStringObject(str, filename, start[compile_mode], &cf, optimize); +#ifdef Py_GIL_DISABLED + interp->gc.immortalize.enable_on_thread_created = old_value; +#endif + Py_XDECREF(source_copy); goto finally; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index f24b0482c2bc38..67bbbd01ca0c48 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -854,6 +854,11 @@ pycore_interp_init(PyThreadState *tstate) return status; } + status = _PyCode_Init(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + status = _PyDtoa_Init(interp); if (_PyStatus_EXCEPTION(status)) { return status; @@ -1827,6 +1832,8 @@ finalize_interp_types(PyInterpreterState *interp) _PyTypes_Fini(interp); + _PyCode_Fini(interp); + // Call _PyUnicode_ClearInterned() before _PyDict_Fini() since it uses // a dict internally. _PyUnicode_ClearInterned(interp); diff --git a/Python/pystate.c b/Python/pystate.c index f442d87ba3150e..7c75263b7e526a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -404,6 +404,7 @@ _Py_COMP_DIAG_POP &(runtime)->audit_hooks.mutex, \ &(runtime)->allocators.mutex, \ &(runtime)->_main_interpreter.types.mutex, \ + &(runtime)->_main_interpreter.code_state.mutex, \ } static void From ff6cbb2503a8fe3fceeadd889e34fc9a8f308ecd Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 6 May 2024 17:22:26 -0700 Subject: [PATCH 14/15] gh-112075: use per-thread dict version pool (#118676) use thread state set of dict versions --- Include/cpython/pystate.h | 1 + Include/internal/pycore_dict.h | 21 +++++++++++-- Lib/test/test_free_threading/test_dict.py | 36 +++++++++++++++++++++++ Modules/_testcapi/dict.c | 14 ++++++++- Python/pystate.c | 1 + 5 files changed, 70 insertions(+), 3 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 0611e299403031..2df9ecd6d52084 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -188,6 +188,7 @@ struct _ts { PyObject *previous_executor; + uint64_t dict_global_version; }; #ifdef Py_DEBUG diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index cb7d4c3219a9af..8d8d3748edaea8 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -221,8 +221,25 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { #define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) - 1) #ifdef Py_GIL_DISABLED -#define DICT_NEXT_VERSION(INTERP) \ - (_Py_atomic_add_uint64(&(INTERP)->dict_state.global_version, DICT_VERSION_INCREMENT) + DICT_VERSION_INCREMENT) + +#define THREAD_LOCAL_DICT_VERSION_COUNT 256 +#define THREAD_LOCAL_DICT_VERSION_BATCH THREAD_LOCAL_DICT_VERSION_COUNT * DICT_VERSION_INCREMENT + +static inline uint64_t +dict_next_version(PyInterpreterState *interp) +{ + PyThreadState *tstate = PyThreadState_GET(); + uint64_t cur_progress = (tstate->dict_global_version & + (THREAD_LOCAL_DICT_VERSION_BATCH - 1)); + if (cur_progress == 0) { + uint64_t next = _Py_atomic_add_uint64(&interp->dict_state.global_version, + THREAD_LOCAL_DICT_VERSION_BATCH); + tstate->dict_global_version = next; + } + return tstate->dict_global_version += DICT_VERSION_INCREMENT; +} + +#define DICT_NEXT_VERSION(INTERP) dict_next_version(INTERP) #else #define DICT_NEXT_VERSION(INTERP) \ diff --git a/Lib/test/test_free_threading/test_dict.py b/Lib/test/test_free_threading/test_dict.py index 6a909dd3ee025f..f877582e6b565c 100644 --- a/Lib/test/test_free_threading/test_dict.py +++ b/Lib/test/test_free_threading/test_dict.py @@ -8,6 +8,8 @@ from threading import Thread from unittest import TestCase +from _testcapi import dict_version + from test.support import threading_helper @@ -137,5 +139,39 @@ def writer_func(l): for ref in thread_list: self.assertIsNone(ref()) + def test_dict_version(self): + THREAD_COUNT = 10 + DICT_COUNT = 10000 + lists = [] + writers = [] + + def writer_func(thread_list): + for i in range(DICT_COUNT): + thread_list.append(dict_version({})) + + for x in range(THREAD_COUNT): + thread_list = [] + lists.append(thread_list) + writer = Thread(target=partial(writer_func, thread_list)) + writers.append(writer) + + for writer in writers: + writer.start() + + for writer in writers: + writer.join() + + total_len = 0 + values = set() + for thread_list in lists: + for v in thread_list: + if v in values: + print('dup', v, (v/4096)%256) + values.add(v) + total_len += len(thread_list) + versions = set(dict_version for thread_list in lists for dict_version in thread_list) + self.assertEqual(len(versions), THREAD_COUNT*DICT_COUNT) + + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testcapi/dict.c b/Modules/_testcapi/dict.c index 4319906dc4fee0..e80d898118daa5 100644 --- a/Modules/_testcapi/dict.c +++ b/Modules/_testcapi/dict.c @@ -1,7 +1,6 @@ #include "parts.h" #include "util.h" - static PyObject * dict_containsstring(PyObject *self, PyObject *args) { @@ -182,6 +181,18 @@ dict_popstring_null(PyObject *self, PyObject *args) RETURN_INT(PyDict_PopString(dict, key, NULL)); } +static PyObject * +dict_version(PyObject *self, PyObject *dict) +{ + if (!PyDict_Check(dict)) { + PyErr_SetString(PyExc_TypeError, "expected dict"); + return NULL; + } +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + return PyLong_FromUnsignedLongLong(((PyDictObject *)dict)->ma_version_tag); +_Py_COMP_DIAG_POP +} static PyMethodDef test_methods[] = { {"dict_containsstring", dict_containsstring, METH_VARARGS}, @@ -193,6 +204,7 @@ static PyMethodDef test_methods[] = { {"dict_pop_null", dict_pop_null, METH_VARARGS}, {"dict_popstring", dict_popstring, METH_VARARGS}, {"dict_popstring_null", dict_popstring_null, METH_VARARGS}, + {"dict_version", dict_version, METH_O}, {NULL}, }; diff --git a/Python/pystate.c b/Python/pystate.c index 7c75263b7e526a..2f1521edd8bf0a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1488,6 +1488,7 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->datastack_limit = NULL; tstate->what_event = -1; tstate->previous_executor = NULL; + tstate->dict_global_version = 0; tstate->delete_later = NULL; From b4bdf83cc67434235d9630c92c84a5261992b235 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 7 May 2024 01:32:48 +0100 Subject: [PATCH 15/15] GH-116380: Revert move of pathlib globbing code to `pathlib._glob` (#118678) The previous change made the `glob` module slower to import, because it imported `pathlib._glob` and hence the rest of `pathlib`. Reverts a40f557d7b7a355a55bb90c068e3e9202fd9c8f2. --- Lib/glob.py | 327 ++++++++++++++++++++++++++++++++++++++++- Lib/pathlib/_abc.py | 7 +- Lib/pathlib/_glob.py | 330 ------------------------------------------ Lib/pathlib/_local.py | 4 +- 4 files changed, 331 insertions(+), 337 deletions(-) delete mode 100644 Lib/pathlib/_glob.py diff --git a/Lib/glob.py b/Lib/glob.py index 9fe0e9f98ac5a7..6088de00a67a99 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -2,12 +2,14 @@ import contextlib import os +import re import fnmatch +import functools import itertools +import operator import stat import sys -from pathlib._glob import translate, magic_check, magic_check_bytes __all__ = ["glob", "iglob", "escape", "translate"] @@ -225,6 +227,9 @@ def _join(dirname, basename): return dirname or basename return os.path.join(dirname, basename) +magic_check = re.compile('([*?[])') +magic_check_bytes = re.compile(b'([*?[])') + def has_magic(s): if isinstance(s, bytes): match = magic_check_bytes.search(s) @@ -254,4 +259,324 @@ def escape(pathname): return drive + pathname +_special_parts = ('', '.', '..') _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) +_no_recurse_symlinks = object() + + +def translate(pat, *, recursive=False, include_hidden=False, seps=None): + """Translate a pathname with shell wildcards to a regular expression. + + If `recursive` is true, the pattern segment '**' will match any number of + path segments. + + If `include_hidden` is true, wildcards can match path segments beginning + with a dot ('.'). + + If a sequence of separator characters is given to `seps`, they will be + used to split the pattern into segments and match path separators. If not + given, os.path.sep and os.path.altsep (where available) are used. + """ + if not seps: + if os.path.altsep: + seps = (os.path.sep, os.path.altsep) + else: + seps = os.path.sep + escaped_seps = ''.join(map(re.escape, seps)) + any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps + not_sep = f'[^{escaped_seps}]' + if include_hidden: + one_last_segment = f'{not_sep}+' + one_segment = f'{one_last_segment}{any_sep}' + any_segments = f'(?:.+{any_sep})?' + any_last_segments = '.*' + else: + one_last_segment = f'[^{escaped_seps}.]{not_sep}*' + one_segment = f'{one_last_segment}{any_sep}' + any_segments = f'(?:{one_segment})*' + any_last_segments = f'{any_segments}(?:{one_last_segment})?' + + results = [] + parts = re.split(any_sep, pat) + last_part_idx = len(parts) - 1 + for idx, part in enumerate(parts): + if part == '*': + results.append(one_segment if idx < last_part_idx else one_last_segment) + elif recursive and part == '**': + if idx < last_part_idx: + if parts[idx + 1] != '**': + results.append(any_segments) + else: + results.append(any_last_segments) + else: + if part: + if not include_hidden and part[0] in '*?': + results.append(r'(?!\.)') + results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) + if idx < last_part_idx: + results.append(any_sep) + res = ''.join(results) + return fr'(?s:{res})\Z' + + +@functools.lru_cache(maxsize=512) +def _compile_pattern(pat, sep, case_sensitive, recursive=True): + """Compile given glob pattern to a re.Pattern object (observing case + sensitivity).""" + flags = re.NOFLAG if case_sensitive else re.IGNORECASE + regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) + return re.compile(regex, flags=flags).match + + +class _Globber: + """Class providing shell-style pattern matching and globbing. + """ + + def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): + self.sep = sep + self.case_sensitive = case_sensitive + self.case_pedantic = case_pedantic + self.recursive = recursive + + # Low-level methods + + lstat = operator.methodcaller('lstat') + add_slash = operator.methodcaller('joinpath', '') + + @staticmethod + def scandir(path): + """Emulates os.scandir(), which returns an object that can be used as + a context manager. This method is called by walk() and glob(). + """ + return contextlib.nullcontext(path.iterdir()) + + @staticmethod + def concat_path(path, text): + """Appends text to the given path. + """ + return path.with_segments(path._raw_path + text) + + @staticmethod + def parse_entry(entry): + """Returns the path of an entry yielded from scandir(). + """ + return entry + + # High-level methods + + def compile(self, pat): + return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) + + def selector(self, parts): + """Returns a function that selects from a given path, walking and + filtering according to the glob-style pattern parts in *parts*. + """ + if not parts: + return self.select_exists + part = parts.pop() + if self.recursive and part == '**': + selector = self.recursive_selector + elif part in _special_parts: + selector = self.special_selector + elif not self.case_pedantic and magic_check.search(part) is None: + selector = self.literal_selector + else: + selector = self.wildcard_selector + return selector(part, parts) + + def special_selector(self, part, parts): + """Returns a function that selects special children of the given path. + """ + select_next = self.selector(parts) + + def select_special(path, exists=False): + path = self.concat_path(self.add_slash(path), part) + return select_next(path, exists) + return select_special + + def literal_selector(self, part, parts): + """Returns a function that selects a literal descendant of a path. + """ + + # Optimization: consume and join any subsequent literal parts here, + # rather than leaving them for the next selector. This reduces the + # number of string concatenation operations and calls to add_slash(). + while parts and magic_check.search(parts[-1]) is None: + part += self.sep + parts.pop() + + select_next = self.selector(parts) + + def select_literal(path, exists=False): + path = self.concat_path(self.add_slash(path), part) + return select_next(path, exists=False) + return select_literal + + def wildcard_selector(self, part, parts): + """Returns a function that selects direct children of a given path, + filtering by pattern. + """ + + match = None if part == '*' else self.compile(part) + dir_only = bool(parts) + if dir_only: + select_next = self.selector(parts) + + def select_wildcard(path, exists=False): + try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with self.scandir(path) as scandir_it: + entries = list(scandir_it) + except OSError: + pass + else: + for entry in entries: + if match is None or match(entry.name): + if dir_only: + try: + if not entry.is_dir(): + continue + except OSError: + continue + entry_path = self.parse_entry(entry) + if dir_only: + yield from select_next(entry_path, exists=True) + else: + yield entry_path + return select_wildcard + + def recursive_selector(self, part, parts): + """Returns a function that selects a given path and all its children, + recursively, filtering by pattern. + """ + # Optimization: consume following '**' parts, which have no effect. + while parts and parts[-1] == '**': + parts.pop() + + # Optimization: consume and join any following non-special parts here, + # rather than leaving them for the next selector. They're used to + # build a regular expression, which we use to filter the results of + # the recursive walk. As a result, non-special pattern segments + # following a '**' wildcard don't require additional filesystem access + # to expand. + follow_symlinks = self.recursive is not _no_recurse_symlinks + if follow_symlinks: + while parts and parts[-1] not in _special_parts: + part += self.sep + parts.pop() + + match = None if part == '**' else self.compile(part) + dir_only = bool(parts) + select_next = self.selector(parts) + + def select_recursive(path, exists=False): + path = self.add_slash(path) + match_pos = len(str(path)) + if match is None or match(str(path), match_pos): + yield from select_next(path, exists) + stack = [path] + while stack: + yield from select_recursive_step(stack, match_pos) + + def select_recursive_step(stack, match_pos): + path = stack.pop() + try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with self.scandir(path) as scandir_it: + entries = list(scandir_it) + except OSError: + pass + else: + for entry in entries: + is_dir = False + try: + if entry.is_dir(follow_symlinks=follow_symlinks): + is_dir = True + except OSError: + pass + + if is_dir or not dir_only: + entry_path = self.parse_entry(entry) + if match is None or match(str(entry_path), match_pos): + if dir_only: + yield from select_next(entry_path, exists=True) + else: + # Optimization: directly yield the path if this is + # last pattern part. + yield entry_path + if is_dir: + stack.append(entry_path) + + return select_recursive + + def select_exists(self, path, exists=False): + """Yields the given path, if it exists. + """ + if exists: + # Optimization: this path is already known to exist, e.g. because + # it was returned from os.scandir(), so we skip calling lstat(). + yield path + else: + try: + self.lstat(path) + yield path + except OSError: + pass + + @classmethod + def walk(cls, root, top_down, on_error, follow_symlinks): + """Walk the directory tree from the given root, similar to os.walk(). + """ + paths = [root] + while paths: + path = paths.pop() + if isinstance(path, tuple): + yield path + continue + try: + with cls.scandir(path) as scandir_it: + dirnames = [] + filenames = [] + if not top_down: + paths.append((path, dirnames, filenames)) + for entry in scandir_it: + name = entry.name + try: + if entry.is_dir(follow_symlinks=follow_symlinks): + if not top_down: + paths.append(cls.parse_entry(entry)) + dirnames.append(name) + else: + filenames.append(name) + except OSError: + filenames.append(name) + except OSError as error: + if on_error is not None: + on_error(error) + else: + if top_down: + yield path, dirnames, filenames + if dirnames: + prefix = cls.add_slash(path) + paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)] + + +class _StringGlobber(_Globber): + lstat = staticmethod(os.lstat) + scandir = staticmethod(os.scandir) + parse_entry = operator.attrgetter('path') + concat_path = operator.add + + if os.name == 'nt': + @staticmethod + def add_slash(pathname): + tail = os.path.splitroot(pathname)[2] + if not tail or tail[-1] in '\\/': + return pathname + return f'{pathname}\\' + else: + @staticmethod + def add_slash(pathname): + if not pathname or pathname[-1] == '/': + return pathname + return f'{pathname}/' diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 9ef016a060f1d7..06c10e8e4612ca 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,11 +12,10 @@ """ import functools +from glob import _Globber, _no_recurse_symlinks from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO -from ._glob import Globber, no_recurse_symlinks - __all__ = ["UnsupportedOperation"] @@ -119,7 +118,7 @@ class PurePathBase: '_resolving', ) parser = ParserBase() - _globber = Globber + _globber = _Globber def __init__(self, path, *paths): self._raw_path = self.parser.join(path, *paths) if paths else path @@ -671,7 +670,7 @@ def _glob_selector(self, parts, case_sensitive, recurse_symlinks): # know the case sensitivity of the underlying filesystem, so we # must use scandir() for everything, including non-wildcard parts. case_pedantic = True - recursive = True if recurse_symlinks else no_recurse_symlinks + recursive = True if recurse_symlinks else _no_recurse_symlinks globber = self._globber(self.parser.sep, case_sensitive, case_pedantic, recursive) return globber.selector(parts) diff --git a/Lib/pathlib/_glob.py b/Lib/pathlib/_glob.py deleted file mode 100644 index 49a3851e80a038..00000000000000 --- a/Lib/pathlib/_glob.py +++ /dev/null @@ -1,330 +0,0 @@ -import os -import re -import fnmatch -import functools -import operator - - -special_parts = ('', '.', '..') -magic_check = re.compile('([*?[])') -magic_check_bytes = re.compile(b'([*?[])') -no_recurse_symlinks = object() - - -def translate(pat, *, recursive=False, include_hidden=False, seps=None): - """Translate a pathname with shell wildcards to a regular expression. - - If `recursive` is true, the pattern segment '**' will match any number of - path segments. - - If `include_hidden` is true, wildcards can match path segments beginning - with a dot ('.'). - - If a sequence of separator characters is given to `seps`, they will be - used to split the pattern into segments and match path separators. If not - given, os.path.sep and os.path.altsep (where available) are used. - """ - if not seps: - if os.path.altsep: - seps = (os.path.sep, os.path.altsep) - else: - seps = os.path.sep - escaped_seps = ''.join(map(re.escape, seps)) - any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps - not_sep = f'[^{escaped_seps}]' - if include_hidden: - one_last_segment = f'{not_sep}+' - one_segment = f'{one_last_segment}{any_sep}' - any_segments = f'(?:.+{any_sep})?' - any_last_segments = '.*' - else: - one_last_segment = f'[^{escaped_seps}.]{not_sep}*' - one_segment = f'{one_last_segment}{any_sep}' - any_segments = f'(?:{one_segment})*' - any_last_segments = f'{any_segments}(?:{one_last_segment})?' - - results = [] - parts = re.split(any_sep, pat) - last_part_idx = len(parts) - 1 - for idx, part in enumerate(parts): - if part == '*': - results.append(one_segment if idx < last_part_idx else one_last_segment) - elif recursive and part == '**': - if idx < last_part_idx: - if parts[idx + 1] != '**': - results.append(any_segments) - else: - results.append(any_last_segments) - else: - if part: - if not include_hidden and part[0] in '*?': - results.append(r'(?!\.)') - results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) - if idx < last_part_idx: - results.append(any_sep) - res = ''.join(results) - return fr'(?s:{res})\Z' - - -@functools.lru_cache(maxsize=512) -def compile_pattern(pat, sep, case_sensitive, recursive=True): - """Compile given glob pattern to a re.Pattern object (observing case - sensitivity).""" - flags = re.NOFLAG if case_sensitive else re.IGNORECASE - regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) - return re.compile(regex, flags=flags).match - - -class Globber: - """Class providing shell-style pattern matching and globbing. - """ - - def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): - self.sep = sep - self.case_sensitive = case_sensitive - self.case_pedantic = case_pedantic - self.recursive = recursive - - # Low-level methods - - lstat = operator.methodcaller('lstat') - add_slash = operator.methodcaller('joinpath', '') - - @staticmethod - def scandir(path): - """Emulates os.scandir(), which returns an object that can be used as - a context manager. This method is called by walk() and glob(). - """ - from contextlib import nullcontext - return nullcontext(path.iterdir()) - - @staticmethod - def concat_path(path, text): - """Appends text to the given path. - """ - return path.with_segments(path._raw_path + text) - - @staticmethod - def parse_entry(entry): - """Returns the path of an entry yielded from scandir(). - """ - return entry - - # High-level methods - - def compile(self, pat): - return compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) - - def selector(self, parts): - """Returns a function that selects from a given path, walking and - filtering according to the glob-style pattern parts in *parts*. - """ - if not parts: - return self.select_exists - part = parts.pop() - if self.recursive and part == '**': - selector = self.recursive_selector - elif part in special_parts: - selector = self.special_selector - elif not self.case_pedantic and magic_check.search(part) is None: - selector = self.literal_selector - else: - selector = self.wildcard_selector - return selector(part, parts) - - def special_selector(self, part, parts): - """Returns a function that selects special children of the given path. - """ - select_next = self.selector(parts) - - def select_special(path, exists=False): - path = self.concat_path(self.add_slash(path), part) - return select_next(path, exists) - return select_special - - def literal_selector(self, part, parts): - """Returns a function that selects a literal descendant of a path. - """ - - # Optimization: consume and join any subsequent literal parts here, - # rather than leaving them for the next selector. This reduces the - # number of string concatenation operations and calls to add_slash(). - while parts and magic_check.search(parts[-1]) is None: - part += self.sep + parts.pop() - - select_next = self.selector(parts) - - def select_literal(path, exists=False): - path = self.concat_path(self.add_slash(path), part) - return select_next(path, exists=False) - return select_literal - - def wildcard_selector(self, part, parts): - """Returns a function that selects direct children of a given path, - filtering by pattern. - """ - - match = None if part == '*' else self.compile(part) - dir_only = bool(parts) - if dir_only: - select_next = self.selector(parts) - - def select_wildcard(path, exists=False): - try: - # We must close the scandir() object before proceeding to - # avoid exhausting file descriptors when globbing deep trees. - with self.scandir(path) as scandir_it: - entries = list(scandir_it) - except OSError: - pass - else: - for entry in entries: - if match is None or match(entry.name): - if dir_only: - try: - if not entry.is_dir(): - continue - except OSError: - continue - entry_path = self.parse_entry(entry) - if dir_only: - yield from select_next(entry_path, exists=True) - else: - yield entry_path - return select_wildcard - - def recursive_selector(self, part, parts): - """Returns a function that selects a given path and all its children, - recursively, filtering by pattern. - """ - # Optimization: consume following '**' parts, which have no effect. - while parts and parts[-1] == '**': - parts.pop() - - # Optimization: consume and join any following non-special parts here, - # rather than leaving them for the next selector. They're used to - # build a regular expression, which we use to filter the results of - # the recursive walk. As a result, non-special pattern segments - # following a '**' wildcard don't require additional filesystem access - # to expand. - follow_symlinks = self.recursive is not no_recurse_symlinks - if follow_symlinks: - while parts and parts[-1] not in special_parts: - part += self.sep + parts.pop() - - match = None if part == '**' else self.compile(part) - dir_only = bool(parts) - select_next = self.selector(parts) - - def select_recursive(path, exists=False): - path = self.add_slash(path) - match_pos = len(str(path)) - if match is None or match(str(path), match_pos): - yield from select_next(path, exists) - stack = [path] - while stack: - yield from select_recursive_step(stack, match_pos) - - def select_recursive_step(stack, match_pos): - path = stack.pop() - try: - # We must close the scandir() object before proceeding to - # avoid exhausting file descriptors when globbing deep trees. - with self.scandir(path) as scandir_it: - entries = list(scandir_it) - except OSError: - pass - else: - for entry in entries: - is_dir = False - try: - if entry.is_dir(follow_symlinks=follow_symlinks): - is_dir = True - except OSError: - pass - - if is_dir or not dir_only: - entry_path = self.parse_entry(entry) - if match is None or match(str(entry_path), match_pos): - if dir_only: - yield from select_next(entry_path, exists=True) - else: - # Optimization: directly yield the path if this is - # last pattern part. - yield entry_path - if is_dir: - stack.append(entry_path) - - return select_recursive - - def select_exists(self, path, exists=False): - """Yields the given path, if it exists. - """ - if exists: - # Optimization: this path is already known to exist, e.g. because - # it was returned from os.scandir(), so we skip calling lstat(). - yield path - else: - try: - self.lstat(path) - yield path - except OSError: - pass - - @classmethod - def walk(cls, root, top_down, on_error, follow_symlinks): - """Walk the directory tree from the given root, similar to os.walk(). - """ - paths = [root] - while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path - continue - try: - with cls.scandir(path) as scandir_it: - dirnames = [] - filenames = [] - if not top_down: - paths.append((path, dirnames, filenames)) - for entry in scandir_it: - name = entry.name - try: - if entry.is_dir(follow_symlinks=follow_symlinks): - if not top_down: - paths.append(cls.parse_entry(entry)) - dirnames.append(name) - else: - filenames.append(name) - except OSError: - filenames.append(name) - except OSError as error: - if on_error is not None: - on_error(error) - else: - if top_down: - yield path, dirnames, filenames - if dirnames: - prefix = cls.add_slash(path) - paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)] - - -class StringGlobber(Globber): - lstat = staticmethod(os.lstat) - scandir = staticmethod(os.scandir) - parse_entry = operator.attrgetter('path') - concat_path = operator.add - - if os.name == 'nt': - @staticmethod - def add_slash(pathname): - tail = os.path.splitroot(pathname)[2] - if not tail or tail[-1] in '\\/': - return pathname - return f'{pathname}\\' - else: - @staticmethod - def add_slash(pathname): - if not pathname or pathname[-1] == '/': - return pathname - return f'{pathname}/' diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index e708191213a8b4..b1e678aceb9ce8 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -5,6 +5,7 @@ import posixpath import sys import warnings +from glob import _StringGlobber from itertools import chain from _collections_abc import Sequence @@ -18,7 +19,6 @@ grp = None from ._abc import UnsupportedOperation, PurePathBase, PathBase -from ._glob import StringGlobber __all__ = [ @@ -102,7 +102,7 @@ class PurePath(PurePathBase): '_hash', ) parser = os.path - _globber = StringGlobber + _globber = _StringGlobber def __new__(cls, *args, **kwargs): """Construct a PurePath from one or several strings and or existing