diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 9bc8758e72bd8f..8d6c3ce29c30ec 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -68,6 +68,11 @@ struct _ts { PyThreadState *next; PyInterpreterState *interp; + /* The global instrumentation version in high bits, plus flags indicating + when to break out of the interpreter loop in lower bits. See details in + pycore_pystate.h. */ + uintptr_t eval_breaker; + struct { /* Has been initialized to a safe state. diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index a66af1389541dd..8f81df5e3a7ce9 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -42,7 +42,7 @@ PyAPI_FUNC(int) _PyEval_MakePendingCalls(PyThreadState *); extern void _Py_FinishPendingCalls(PyThreadState *tstate); extern void _PyEval_InitState(PyInterpreterState *); -extern void _PyEval_SignalReceived(PyInterpreterState *interp); +extern void _PyEval_SignalReceived(void); // bitwise flags: #define _Py_PENDING_MAINTHREADONLY 1 @@ -55,7 +55,6 @@ PyAPI_FUNC(int) _PyEval_AddPendingCall( void *arg, int flags); -extern void _PyEval_SignalAsyncExc(PyInterpreterState *interp); #ifdef HAVE_FORK extern PyStatus _PyEval_ReInitThreads(PyThreadState *tstate); #endif @@ -181,8 +180,9 @@ extern struct _PyInterpreterFrame* _PyEval_GetFrame(void); extern PyObject* _Py_MakeCoro(PyFunctionObject *func); /* Handle signals, pending calls, GIL drop request - and asynchronous exception */ -extern int _Py_HandlePending(PyThreadState *tstate); + and asynchronous exception. + Export for '_testinternalcapi' shared extension. */ +PyAPI_FUNC(int) _Py_HandlePending(PyThreadState *tstate); extern PyObject * _PyEval_GetFrameLocals(void); @@ -200,40 +200,6 @@ int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int a void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); -#define _PY_GIL_DROP_REQUEST_BIT 0 -#define _PY_SIGNALS_PENDING_BIT 1 -#define _PY_CALLS_TO_DO_BIT 2 -#define _PY_ASYNC_EXCEPTION_BIT 3 -#define _PY_GC_SCHEDULED_BIT 4 -#define _PY_EVAL_PLEASE_STOP_BIT 5 - -/* Reserve a few bits for future use */ -#define _PY_EVAL_EVENTS_BITS 8 -#define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1) - -static inline void -_Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set) -{ - assert(set == 0 || set == 1); - uintptr_t to_set = set << bit; - uintptr_t mask = ((uintptr_t)1) << bit; - uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker); - if ((old & mask) == to_set) { - return; - } - uintptr_t new; - do { - new = (old & ~mask) | to_set; - } while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new)); -} - -static inline bool -_Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit) -{ - return _Py_atomic_load_uintptr_relaxed(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit); -} - - #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 28738980eb49be..d398ede5664cd3 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -78,11 +78,14 @@ struct _ceval_runtime_state { struct _ceval_state { - /* This single variable consolidates all requests to break out of - * the fast path in the eval loop. - * It is by far the hottest field in this struct and - * should be placed at the beginning. */ - uintptr_t eval_breaker; + /* This single variable holds the global instrumentation version and some + * interpreter-global requests to break out of the fast path in the eval + * loop. PyThreadState also contains an eval_breaker, which is the source + * of truth when a thread is running. + * + * It is by far the hottest field in this struct and should be placed at + * the beginning. */ + uintptr_t interp_eval_breaker; /* Avoid false sharing */ int64_t padding[7]; int recursion_limit; diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 8d0bc2a218e48d..38dcb1feabf7e0 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -287,7 +287,8 @@ extern void _PySlice_ClearCache(_PyFreeListState *state); extern void _PyDict_ClearFreeList(_PyFreeListState *state, int is_finalization); extern void _PyAsyncGen_ClearFreeLists(_PyFreeListState *state, int is_finalization); extern void _PyContext_ClearFreeList(_PyFreeListState *state, int is_finalization); -extern void _Py_ScheduleGC(PyInterpreterState *interp); +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(void) _Py_ScheduleGC(PyThreadState *interp); extern void _Py_RunGC(PyThreadState *tstate); #ifdef __cplusplus diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 289ef28f0dd9a9..41d1150fee2b3a 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -282,6 +282,42 @@ static inline _PyFreeListState* _PyFreeListState_GET(void) #endif } +/* Bits that can be set in PyThreadState.eval_breaker */ +#define _PY_GIL_DROP_REQUEST_BIT (1U << 0) +#define _PY_SIGNALS_PENDING_BIT (1U << 1) +#define _PY_CALLS_TO_DO_BIT (1U << 2) +#define _PY_ASYNC_EXCEPTION_BIT (1U << 3) +#define _PY_GC_SCHEDULED_BIT (1U << 4) +#define _PY_EVAL_PLEASE_STOP_BIT (1U << 5) + +/* Reserve a few bits for future use */ +#define _PY_EVAL_EVENTS_BITS 8 +#define _PY_EVAL_EVENTS_MASK ((1U << _PY_EVAL_EVENTS_BITS)-1) + +static inline void +_PyThreadState_Signal(PyThreadState *tstate, uintptr_t bit) +{ + _Py_atomic_or_uintptr(&tstate->eval_breaker, bit); +} + +static inline void +_PyThreadState_Unsignal(PyThreadState *tstate, uintptr_t bit) +{ + _Py_atomic_and_uintptr(&tstate->eval_breaker, ~bit); +} + +static inline int +_PyThreadState_IsSignalled(PyThreadState *tstate, uintptr_t bit) +{ + uintptr_t b = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); + return (b & bit) != 0; +} + +// Free-threaded builds use these functions to set or unset a bit on all +// threads in the given interpreter. +void _PyInterpreterState_SignalAll(PyInterpreterState *interp, uintptr_t bit); +void _PyInterpreterState_UnsignalAll(PyInterpreterState *interp, uintptr_t bit); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 7c705d1224f915..0c9c59e85b2fcf 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -191,7 +191,10 @@ typedef struct pyruntimestate { int64_t next_id; } interpreters; + /* Platform-specific identifier and PyThreadState, respectively, for the + main thread in the main interpreter. */ unsigned long main_thread; + PyThreadState *main_tstate; /* ---------- IMPORTANT --------------------------- The fields above this line are declared as early as diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b01f344cb14a1a..5bd827d1163094 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -6,6 +6,10 @@ from test.support.os_helper import temp_dir, TESTFN, unlink from test.support.script_helper import assert_python_ok, make_script from test.support import threading_helper +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None import gc import sys @@ -1418,6 +1422,43 @@ def test_ast_fini(self): assert_python_ok("-c", code) +class GCSchedulingTests(unittest.TestCase): + @unittest.skipIf(_testinternalcapi is None, + "Requires functions from _testinternalcapi") + @threading_helper.requires_working_threading() + def test_gc_schedule_before_thread_switch(self): + # Ensure that a scheduled collection is not lost due to thread + # switching. Most of the work happens in helper functions in + # _testinternalcapi. + + class Cycle: + def __init__(self): + self._self = self + + thresholds = gc.get_threshold() + gc.enable() + + try: + state = _testinternalcapi.schedule_gc_new_state() + + def thread1(): + _testinternalcapi.schedule_gc_do_schedule(state) + + gc.set_threshold(1) + threads = [threading.Thread(target=thread1)] + with threading_helper.start_threads(threads): + r = weakref.ref(Cycle()) + _testinternalcapi.schedule_gc_do_wait(state) + + # Ensure that at least one GC has happened + for i in range(5): + self.assertEqual(1, 1) + self.assertIsNone(r()) + finally: + gc.disable() + gc.set_threshold(*thresholds) + + def setUpModule(): global enabled, debug enabled = gc.isenabled() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 0bb739b5398b11..5f64bf5c04493e 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1650,6 +1650,117 @@ get_rare_event_counters(PyObject *self, PyObject *type) ); } +// The schedule_gc_* functions work together to test GC timing and the eval +// breaker, when used by +// test_gc.py:GCSchedulingTests.test_gc_schedule_before_thread_switch(). +// +// The expected sequence of events is: +// - thread 2 waits for thread 1 to be ready +// - thread 1 waits for thread 2 to be ready +// (both threads are now at known locations in their respective C functions) +// - thread 1 clears out pending eval breaker flags +// - thread 2 checks that a GC is not scheduled +// - thread 1 schedules a GC and releases the GIL without checking its eval breaker +// - thread 2 checks that a GC is scheduled and returns +// - thread 1 sees that thread 2 is done and returns, allowing Python code to run again +typedef enum { + SCHEDULE_GC_INIT, + SCHEDULE_GC_THREAD1_READY, + SCHEDULE_GC_THREAD2_READY, + SCHEDULE_GC_THREAD1_CLEARED, + SCHEDULE_GC_THREAD2_VERIFIED, + SCHEDULE_GC_THREAD1_SCHEDULED, + SCHEDULE_GC_THREAD2_DONE, + + SCHEDULE_GC_STOP, +} schedule_gc_state; + +static void +schedule_gc_state_destructor(PyObject *capsule) +{ + void *state = PyCapsule_GetPointer(capsule, NULL); + assert(state != NULL); + free(state); +} + +static PyObject * +schedule_gc_new_state(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + schedule_gc_state *state = malloc(sizeof(schedule_gc_state)); + if (state == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Failed to allocate state"); + return NULL; + } + *state = SCHEDULE_GC_INIT; + return PyCapsule_New(state, NULL, schedule_gc_state_destructor); +} + +// Repeatedly release the GIL until the desired state appears in *state. +#define SCHEDULE_GC_WAIT_FOR(desired) \ + do { \ + while (*state != desired) { \ + if (*state == SCHEDULE_GC_STOP) { \ + Py_RETURN_NONE; \ + } \ + PyEval_RestoreThread(PyEval_SaveThread()); \ + } \ + } while (0) + +static PyObject * +schedule_gc_do_schedule(PyObject *self, PyObject *capsule) +{ + PyThreadState *tstate = PyThreadState_Get(); + schedule_gc_state *state = PyCapsule_GetPointer(capsule, NULL); + assert(state != NULL); + + *state = SCHEDULE_GC_THREAD1_READY; + SCHEDULE_GC_WAIT_FOR(SCHEDULE_GC_THREAD2_READY); + + if (_Py_HandlePending(tstate) < 0) { + *state = SCHEDULE_GC_STOP; + return NULL; + } + *state = SCHEDULE_GC_THREAD1_CLEARED; + SCHEDULE_GC_WAIT_FOR(SCHEDULE_GC_THREAD2_VERIFIED); + + _Py_ScheduleGC(tstate); + *state = SCHEDULE_GC_THREAD1_SCHEDULED; + SCHEDULE_GC_WAIT_FOR(SCHEDULE_GC_THREAD2_DONE); + + Py_RETURN_NONE; +} + +static PyObject * +schedule_gc_do_wait(PyObject *self, PyObject *capsule) +{ + PyThreadState *tstate = PyThreadState_Get(); + schedule_gc_state *state = PyCapsule_GetPointer(capsule, NULL); + assert(state != NULL); + + SCHEDULE_GC_WAIT_FOR(SCHEDULE_GC_THREAD1_READY); + + *state = SCHEDULE_GC_THREAD2_READY; + SCHEDULE_GC_WAIT_FOR(SCHEDULE_GC_THREAD1_CLEARED); + + if (_PyThreadState_IsSignalled(tstate, _PY_GC_SCHEDULED_BIT)) { + PyErr_SetString(PyExc_AssertionError, + "GC_SCHEDULED_BIT unexpectedly set"); + return NULL; + } + *state = SCHEDULE_GC_THREAD2_VERIFIED; + SCHEDULE_GC_WAIT_FOR(SCHEDULE_GC_THREAD1_SCHEDULED); + + if (!_PyThreadState_IsSignalled(tstate, _PY_GC_SCHEDULED_BIT)) { + PyErr_SetString(PyExc_AssertionError, + "GC_SCHEDULED_BIT not carried over from thread 1"); + return NULL; + } + *state = SCHEDULE_GC_THREAD2_DONE; + // Let the GC run naturally once we've returned to Python. + + Py_RETURN_NONE; +} + #ifdef Py_GIL_DISABLED static PyObject * @@ -1727,6 +1838,9 @@ static PyMethodDef module_functions[] = { _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF {"get_type_module_name", get_type_module_name, METH_O}, {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS}, + {"schedule_gc_new_state", schedule_gc_new_state, METH_NOARGS}, + {"schedule_gc_do_schedule", schedule_gc_do_schedule, METH_O}, + {"schedule_gc_do_wait", schedule_gc_do_wait, METH_O}, #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c index 394a997b20c06d..0969284d3e2af0 100644 --- a/Modules/signalmodule.c +++ b/Modules/signalmodule.c @@ -276,11 +276,7 @@ trip_signal(int sig_num) cleared in PyErr_CheckSignals() before .tripped. */ _Py_atomic_store_int(&is_tripped, 1); - /* Signals are always handled by the main interpreter */ - PyInterpreterState *interp = _PyInterpreterState_Main(); - - /* Notify ceval.c */ - _PyEval_SignalReceived(interp); + _PyEval_SignalReceived(); /* And then write to the wakeup fd *after* setting all the globals and doing the _PyEval_SignalReceived. We used to write to the wakeup fd @@ -303,6 +299,7 @@ trip_signal(int sig_num) int fd = wakeup.fd; if (fd != INVALID_FD) { + PyInterpreterState *interp = _PyInterpreterState_Main(); unsigned char byte = (unsigned char)sig_num; #ifdef MS_WINDOWS if (wakeup.use_send) { @@ -1770,8 +1767,7 @@ PyErr_CheckSignals(void) Python code to ensure signals are handled. Checking for the GC here allows long running native code to clean cycles created using the C-API even if it doesn't run the evaluation loop */ - if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) { - _Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0); + if (_PyThreadState_IsSignalled(tstate, _PY_GC_SCHEDULED_BIT)) { _Py_RunGC(tstate); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6fb4d719e43991..6ba8667fa50835 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -8,7 +8,6 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_ceval.h" // _PyEval_SignalAsyncExc() #include "pycore_code.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS #include "pycore_function.h" @@ -144,7 +143,7 @@ dummy_func( TIER_ONE_ONLY assert(frame == tstate->current_frame); uintptr_t global_version = - _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & + _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((code_version & 255) == 0); @@ -166,14 +165,14 @@ dummy_func( DEOPT_IF(_Py_emscripten_signal_clock == 0); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); + uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version); } inst(INSTRUMENTED_RESUME, (--)) { - uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK; + uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { diff --git a/Python/ceval.c b/Python/ceval.c index 4f208009086191..28a9499694c3de 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -5,7 +5,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_call.h" // _PyObject_CallNoArgs() -#include "pycore_ceval.h" // _PyEval_SignalAsyncExc() +#include "pycore_ceval.h" #include "pycore_code.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS #include "pycore_function.h" diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index ad90359318761a..b5de17abd1cf2f 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -56,60 +56,78 @@ #define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL) #endif -/* bpo-40010: eval_breaker should be recomputed if there - is a pending signal: signal received by another thread which cannot - handle signals. - Similarly, we set CALLS_TO_DO and ASYNC_EXCEPTION to match the thread. -*/ +// Atomically copy the bits indicated by mask between two eval breakers. static inline void -update_eval_breaker_from_thread(PyInterpreterState *interp, PyThreadState *tstate) +copy_eval_breaker_bits(uintptr_t *from, uintptr_t *to, uintptr_t mask) { - if (tstate == NULL) { + uintptr_t from_bits = _Py_atomic_load_uintptr_relaxed(from) & mask; + uintptr_t old_value = _Py_atomic_load_uintptr_relaxed(to); + uintptr_t to_bits = old_value & mask; + if (from_bits == to_bits) { return; } - if (_Py_IsMainThread()) { - int32_t calls_to_do = _Py_atomic_load_int32_relaxed( - &_PyRuntime.ceval.pending_mainthread.calls_to_do); - if (calls_to_do) { - _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); - } - if (_Py_ThreadCanHandleSignals(interp)) { - if (_Py_atomic_load_int(&_PyRuntime.signals.is_tripped)) { - _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); - } - } - } - if (tstate->async_exc != NULL) { - _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); - } + uintptr_t new_value; + do { + new_value = (old_value & ~mask) | from_bits; + } while (!_Py_atomic_compare_exchange_uintptr(to, &old_value, new_value)); } +// When attaching a thread, set the global instrumentation version, +// _PY_CALLS_TO_DO_BIT, and _PY_GC_SCHEDULED_BIT to match the current state of +// the interpreter. static inline void -SET_GIL_DROP_REQUEST(PyInterpreterState *interp) +update_thread_eval_breaker(PyInterpreterState *interp, PyThreadState *tstate) { - _Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 1); -} +#ifdef Py_GIL_DISABLED + // Free-threaded builds eagerly update the eval_breaker on *all* threads as + // needed, so this function doesn't apply. + return; +#endif + if (tstate == NULL) { + return; + } -static inline void -RESET_GIL_DROP_REQUEST(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 0); -} + int32_t calls_to_do = _Py_atomic_load_int32_relaxed( + &interp->ceval.pending.calls_to_do); + if (calls_to_do) { + _PyThreadState_Signal(tstate, _PY_CALLS_TO_DO_BIT); + } + else if (_Py_IsMainThread()) { + calls_to_do = _Py_atomic_load_int32_relaxed( + &_PyRuntime.ceval.pending_mainthread.calls_to_do); + if (calls_to_do) { + _PyThreadState_Signal(tstate, _PY_CALLS_TO_DO_BIT); + } + } + // _PY_CALLS_TO_DO_BIT was derived from other state above, so the only bits + // we copy from our interpreter's eval_breaker are the instrumentation + // version number and GC bit. + const uintptr_t mask = ~_PY_EVAL_EVENTS_MASK | _PY_GC_SCHEDULED_BIT; + copy_eval_breaker_bits(&interp->ceval.interp_eval_breaker, + &tstate->eval_breaker, + mask); +} +// When detaching a thread, transfer _PY_GC_SCHEDULED_BIT to its interpreter, +// in case a GC was scheduled but not processed yet. static inline void -SIGNAL_PENDING_CALLS(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); -} +update_interp_eval_breaker(PyThreadState *tstate, PyInterpreterState *interp) { +#ifdef Py_GIL_DISABLED + // Free-threaded builds eagerly update the eval_breaker on *all* threads as + // needed, so this function doesn't apply. + return; +#endif + if (tstate == NULL) { + return; + } -static inline void -UNSIGNAL_PENDING_CALLS(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 0); + copy_eval_breaker_bits(&tstate->eval_breaker, + &interp->ceval.interp_eval_breaker, + _PY_GC_SCHEDULED_BIT); } /* @@ -240,6 +258,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate) } MUTEX_LOCK(gil->mutex); + update_interp_eval_breaker(tstate, interp); _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1); _Py_atomic_store_int_relaxed(&gil->locked, 0); COND_SIGNAL(gil->cond); @@ -254,13 +273,14 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate) the GIL, and that's the only time we might delete the interpreter, so checking tstate first prevents the crash. See https://github.com/python/cpython/issues/104341. */ - if (tstate != NULL && _Py_eval_breaker_bit_is_set(interp, _PY_GIL_DROP_REQUEST_BIT)) { + if (tstate != NULL && + _PyThreadState_IsSignalled(tstate, _PY_GIL_DROP_REQUEST_BIT)) { MUTEX_LOCK(gil->switch_mutex); /* Not switched yet => wait */ if (((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) == tstate) { assert(_PyThreadState_CheckConsistency(tstate)); - RESET_GIL_DROP_REQUEST(tstate->interp); + _PyThreadState_Unsignal(tstate, _PY_GIL_DROP_REQUEST_BIT); /* NOTE: if COND_WAIT does not atomically start waiting when releasing the mutex, another thread can run through, take the GIL and drop it again, and reset the condition @@ -321,6 +341,8 @@ take_gil(PyThreadState *tstate) _Py_atomic_load_int_relaxed(&gil->locked) && gil->switch_number == saved_switchnum) { + PyThreadState *holder_tstate = + (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder); if (_PyThreadState_MustExit(tstate)) { MUTEX_UNLOCK(gil->mutex); // gh-96387: If the loop requested a drop request in a previous @@ -330,13 +352,13 @@ take_gil(PyThreadState *tstate) // may have to request again a drop request (iterate one more // time). if (drop_requested) { - RESET_GIL_DROP_REQUEST(interp); + _PyThreadState_Unsignal(holder_tstate, _PY_GIL_DROP_REQUEST_BIT); } PyThread_exit_thread(); } assert(_PyThreadState_CheckConsistency(tstate)); - SET_GIL_DROP_REQUEST(interp); + _PyThreadState_Signal(holder_tstate, _PY_GIL_DROP_REQUEST_BIT); drop_requested = 1; } } @@ -369,13 +391,15 @@ take_gil(PyThreadState *tstate) in take_gil() while the main thread called wait_for_thread_shutdown() from Py_Finalize(). */ MUTEX_UNLOCK(gil->mutex); - drop_gil(interp, tstate); + /* Passing NULL to drop_gil() indicates that this thread is about to + terminate and will never hold the GIL again. */ + drop_gil(interp, NULL); PyThread_exit_thread(); } assert(_PyThreadState_CheckConsistency(tstate)); - RESET_GIL_DROP_REQUEST(interp); - update_eval_breaker_from_thread(interp, tstate); + _PyThreadState_Unsignal(tstate, _PY_GIL_DROP_REQUEST_BIT); + update_thread_eval_breaker(interp, tstate); MUTEX_UNLOCK(gil->mutex); @@ -590,15 +614,6 @@ _PyEval_ReInitThreads(PyThreadState *tstate) } #endif -/* This function is used to signal that async exceptions are waiting to be - raised. */ - -void -_PyEval_SignalAsyncExc(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); -} - PyThreadState * PyEval_SaveThread(void) { @@ -646,11 +661,9 @@ PyEval_RestoreThread(PyThreadState *tstate) */ void -_PyEval_SignalReceived(PyInterpreterState *interp) +_PyEval_SignalReceived() { - if (_Py_ThreadCanHandleSignals(interp)) { - _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); - } + _PyThreadState_Signal(_PyRuntime.main_tstate, _PY_SIGNALS_PENDING_BIT); } /* Push one item onto the queue while holding the lock. */ @@ -702,6 +715,24 @@ _pop_pending_call(struct _pending_calls *pending, } } +static void +signal_active_thread(PyInterpreterState *interp, uintptr_t bit) +{ + struct _gil_runtime_state *gil = interp->ceval.gil; + + // If a thread from the targeted interpreter is holding the GIL, signal + // that thread. Otherwise, the next thread to run from the targeted + // interpreter will have its bit set as part of taking the GIL. + MUTEX_LOCK(gil->mutex); + if (_Py_atomic_load_int_relaxed(&gil->locked)) { + PyThreadState *holder = (PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder); + if (holder->interp == interp) { + _PyThreadState_Signal(holder, bit); + } + } + MUTEX_UNLOCK(gil->mutex); +} + /* This implementation is thread-safe. It allows scheduling to be made from any thread, and even from an executing callback. @@ -711,10 +742,9 @@ int _PyEval_AddPendingCall(PyInterpreterState *interp, _Py_pending_call_func func, void *arg, int flags) { - assert(!(flags & _Py_PENDING_MAINTHREADONLY) - || _Py_IsMainInterpreter(interp)); struct _pending_calls *pending = &interp->ceval.pending; - if (flags & _Py_PENDING_MAINTHREADONLY) { + int main_only = (flags & _Py_PENDING_MAINTHREADONLY) != 0; + if (main_only) { /* The main thread only exists in the main interpreter. */ assert(_Py_IsMainInterpreter(interp)); pending = &_PyRuntime.ceval.pending_mainthread; @@ -724,8 +754,16 @@ _PyEval_AddPendingCall(PyInterpreterState *interp, int result = _push_pending_call(pending, func, arg, flags); PyMutex_Unlock(&pending->mutex); - /* signal main loop */ - SIGNAL_PENDING_CALLS(interp); + if (main_only) { + _PyThreadState_Signal(_PyRuntime.main_tstate, _PY_CALLS_TO_DO_BIT); + } else { +#ifdef Py_GIL_DISABLED + _PyInterpreterState_SignalAll(interp, _PY_CALLS_TO_DO_BIT); +#else + signal_active_thread(interp, _PY_CALLS_TO_DO_BIT); +#endif + } + return result; } @@ -742,13 +780,13 @@ static int handle_signals(PyThreadState *tstate) { assert(_PyThreadState_CheckConsistency(tstate)); - _Py_set_eval_breaker_bit(tstate->interp, _PY_SIGNALS_PENDING_BIT, 0); + _PyThreadState_Unsignal(tstate, _PY_SIGNALS_PENDING_BIT); if (!_Py_ThreadCanHandleSignals(tstate->interp)) { return 0; } if (_PyErr_CheckSignalsTstate(tstate) < 0) { /* On failure, re-schedule a call to handle_signals(). */ - _Py_set_eval_breaker_bit(tstate->interp, _PY_SIGNALS_PENDING_BIT, 1); + _PyThreadState_Signal(tstate, _PY_SIGNALS_PENDING_BIT); return -1; } return 0; @@ -783,9 +821,30 @@ _make_pending_calls(struct _pending_calls *pending) return 0; } +static void +signal_pending_calls(PyThreadState *tstate, PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + _PyInterpreterState_SignalAll(interp, _PY_CALLS_TO_DO_BIT); +#else + _PyThreadState_Signal(tstate, _PY_CALLS_TO_DO_BIT); +#endif +} + +static void +unsignal_pending_calls(PyThreadState *tstate, PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + _PyInterpreterState_UnsignalAll(interp, _PY_CALLS_TO_DO_BIT); +#else + _PyThreadState_Unsignal(tstate, _PY_CALLS_TO_DO_BIT); +#endif +} + static int -make_pending_calls(PyInterpreterState *interp) +make_pending_calls(PyThreadState *tstate) { + PyInterpreterState *interp = tstate->interp; struct _pending_calls *pending = &interp->ceval.pending; struct _pending_calls *pending_main = &_PyRuntime.ceval.pending_mainthread; @@ -811,12 +870,12 @@ make_pending_calls(PyInterpreterState *interp) /* unsignal before starting to call callbacks, so that any callback added in-between re-signals */ - UNSIGNAL_PENDING_CALLS(interp); + unsignal_pending_calls(tstate, interp); if (_make_pending_calls(pending) != 0) { pending->busy = 0; /* There might not be more calls to make, but we play it safe. */ - SIGNAL_PENDING_CALLS(interp); + signal_pending_calls(tstate, interp); return -1; } @@ -824,7 +883,7 @@ make_pending_calls(PyInterpreterState *interp) if (_make_pending_calls(pending_main) != 0) { pending->busy = 0; /* There might not be more calls to make, but we play it safe. */ - SIGNAL_PENDING_CALLS(interp); + signal_pending_calls(tstate, interp); return -1; } } @@ -839,7 +898,7 @@ _Py_FinishPendingCalls(PyThreadState *tstate) assert(PyGILState_Check()); assert(_PyThreadState_CheckConsistency(tstate)); - if (make_pending_calls(tstate->interp) < 0) { + if (make_pending_calls(tstate) < 0) { PyObject *exc = _PyErr_GetRaisedException(tstate); PyErr_BadInternalCall(); _PyErr_ChainExceptions1(exc); @@ -862,7 +921,7 @@ _PyEval_MakePendingCalls(PyThreadState *tstate) } } - res = make_pending_calls(tstate->interp); + res = make_pending_calls(tstate); if (res != 0) { return res; } @@ -956,10 +1015,11 @@ int _Py_HandlePending(PyThreadState *tstate) { PyInterpreterState *interp = tstate->interp; + uintptr_t breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); /* Stop-the-world */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_EVAL_PLEASE_STOP_BIT)) { - _Py_set_eval_breaker_bit(interp, _PY_EVAL_PLEASE_STOP_BIT, 0); + if ((breaker & _PY_EVAL_PLEASE_STOP_BIT) != 0) { + _PyThreadState_Unsignal(tstate, _PY_EVAL_PLEASE_STOP_BIT); _PyThreadState_Suspend(tstate); /* The attach blocks until the stop-the-world event is complete. */ @@ -967,27 +1027,26 @@ _Py_HandlePending(PyThreadState *tstate) } /* Pending signals */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_SIGNALS_PENDING_BIT)) { + if ((breaker & _PY_SIGNALS_PENDING_BIT) != 0) { if (handle_signals(tstate) != 0) { return -1; } } /* Pending calls */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_CALLS_TO_DO_BIT)) { - if (make_pending_calls(interp) != 0) { + if ((breaker & _PY_CALLS_TO_DO_BIT) != 0) { + if (make_pending_calls(tstate) != 0) { return -1; } } /* GC scheduled to run */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_GC_SCHEDULED_BIT)) { - _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 0); + if ((breaker & _PY_GC_SCHEDULED_BIT) != 0) { _Py_RunGC(tstate); } /* GIL drop request */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_GIL_DROP_REQUEST_BIT)) { + if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) { /* Give another thread a chance */ _PyThreadState_Detach(tstate); @@ -997,11 +1056,10 @@ _Py_HandlePending(PyThreadState *tstate) } /* Check for asynchronous exception. */ - if (_Py_eval_breaker_bit_is_set(interp, _PY_ASYNC_EXCEPTION_BIT)) { - _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 0); - if (tstate->async_exc != NULL) { - PyObject *exc = tstate->async_exc; - tstate->async_exc = NULL; + if ((breaker & _PY_ASYNC_EXCEPTION_BIT) != 0) { + _PyThreadState_Unsignal(tstate, _PY_ASYNC_EXCEPTION_BIT); + PyObject *exc = _Py_atomic_exchange_ptr(&tstate->async_exc, NULL); + if (exc != NULL) { _PyErr_SetNone(tstate, exc); Py_DECREF(exc); return -1; @@ -1009,4 +1067,3 @@ _Py_HandlePending(PyThreadState *tstate) } return 0; } - diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index c2550f53ad6eaa..d5a979f801e7ba 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -117,7 +117,7 @@ #define CHECK_EVAL_BREAKER() \ _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ - if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \ + if (_Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & _PY_EVAL_EVENTS_MASK) { \ if (_Py_HandlePending(tstate) != 0) { \ GOTO_ERROR(error); \ } \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2d914b82dbf88f..14a50ab370c8dd 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -17,7 +17,7 @@ if (_Py_emscripten_signal_clock == 0) goto deoptimize; _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); + uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((version & _PY_EVAL_EVENTS_MASK) == 0); if (eval_breaker != version) goto deoptimize; diff --git a/Python/gc.c b/Python/gc.c index 46646760291526..586b32d3194225 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -3,7 +3,6 @@ // See https://devguide.python.org/internals/garbage-collector/ #include "Python.h" -#include "pycore_ceval.h" // _Py_set_eval_breaker_bit() #include "pycore_context.h" #include "pycore_dict.h" // _PyDict_MaybeUntrack() #include "pycore_initconfig.h" @@ -1765,9 +1764,12 @@ PyObject_IS_GC(PyObject *obj) } void -_Py_ScheduleGC(PyInterpreterState *interp) +_Py_ScheduleGC(PyThreadState *tstate) { - _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1); + if (!_PyThreadState_IsSignalled(tstate, _PY_GC_SCHEDULED_BIT)) + { + _PyThreadState_Signal(tstate, _PY_GC_SCHEDULED_BIT); + } } void @@ -1787,13 +1789,14 @@ _PyObject_GC_Link(PyObject *op) !_Py_atomic_load_int_relaxed(&gcstate->collecting) && !_PyErr_Occurred(tstate)) { - _Py_ScheduleGC(tstate->interp); + _Py_ScheduleGC(tstate); } } void _Py_RunGC(PyThreadState *tstate) { + _PyThreadState_Unsignal(tstate, _PY_GC_SCHEDULED_BIT); gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 8fbcdb15109b76..54a926c0830a0d 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1480,9 +1480,9 @@ PyObject_IS_GC(PyObject *obj) } void -_Py_ScheduleGC(PyInterpreterState *interp) +_Py_ScheduleGC(PyThreadState *tstate) { - _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1); + _PyInterpreterState_SignalAll(tstate->interp, _PY_GC_SCHEDULED_BIT); } void @@ -1495,13 +1495,14 @@ _PyObject_GC_Link(PyObject *op) if (gc_should_collect(gcstate) && !_Py_atomic_load_int_relaxed(&gcstate->collecting)) { - _Py_ScheduleGC(tstate->interp); + _Py_ScheduleGC(tstate); } } void _Py_RunGC(PyThreadState *tstate) { + _PyInterpreterState_UnsignalAll(tstate->interp, _PY_GC_SCHEDULED_BIT); gc_collect_main(tstate, 0, _Py_GC_REASON_HEAP); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 16f1db30620d72..64054b5d1890ac 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3128,7 +3128,7 @@ _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_RESUME); - uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK; + uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { @@ -4795,7 +4795,7 @@ TIER_ONE_ONLY assert(frame == tstate->current_frame); uintptr_t global_version = - _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & + _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((code_version & 255) == 0); @@ -4822,7 +4822,7 @@ DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); + uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version, RESUME); diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 533aece210202b..79477701536b22 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -891,18 +891,39 @@ static inline int most_significant_bit(uint8_t bits) { static uint32_t global_version(PyInterpreterState *interp) { - return interp->ceval.eval_breaker & ~_PY_EVAL_EVENTS_MASK; + return interp->ceval.interp_eval_breaker & ~_PY_EVAL_EVENTS_MASK; } static void -set_global_version(PyInterpreterState *interp, uint32_t version) +set_version_raw(uintptr_t *breaker, uint32_t version) { - assert((version & _PY_EVAL_EVENTS_MASK) == 0); - uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker); + uintptr_t old = _Py_atomic_load_uintptr(breaker); intptr_t new; do { new = (old & _PY_EVAL_EVENTS_MASK) | version; - } while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new)); + } while (!_Py_atomic_compare_exchange_uintptr(breaker, &old, new)); +} + +static void +set_global_version(PyThreadState *tstate, uint32_t version) +{ + assert((version & _PY_EVAL_EVENTS_MASK) == 0); + PyInterpreterState *interp = tstate->interp; + set_version_raw(&tstate->interp->ceval.interp_eval_breaker, version); + +#ifdef Py_GIL_DISABLED + // Set the version on all threads in free-threaded builds. + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + for (; tstate; tstate = PyThreadState_Next(tstate)) { + set_version_raw(&tstate->eval_breaker, version); + }; + HEAD_UNLOCK(runtime); +#else + // Normal builds take the current version from interp_eval_breaker when + // attaching a thread, so we only have to set the current thread's version. + set_version_raw(&tstate->eval_breaker, version); +#endif } static bool @@ -1566,7 +1587,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) { if (is_version_up_to_date(code, interp)) { assert( - (interp->ceval.eval_breaker & ~_PY_EVAL_EVENTS_MASK) == 0 || + (interp->ceval.interp_eval_breaker & ~_PY_EVAL_EVENTS_MASK) == 0 || instrumentation_cross_checks(interp, code) ); return 0; @@ -1778,7 +1799,8 @@ int _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) { assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); - PyInterpreterState *interp = _PyInterpreterState_GET(); + PyThreadState *tstate = _PyThreadState_GET(); + PyInterpreterState *interp = tstate->interp; assert(events < (1 << _PY_MONITORING_UNGROUPED_EVENTS)); if (check_tool(interp, tool_id)) { return -1; @@ -1793,7 +1815,7 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) PyErr_Format(PyExc_OverflowError, "events set too many times"); return -1; } - set_global_version(interp, new_version); + set_global_version(tstate, new_version); _Py_Executors_InvalidateAll(interp); return instrument_all_executing_code_objects(interp); } @@ -2122,7 +2144,8 @@ monitoring_restart_events_impl(PyObject *module) * last restart version > instrumented version for all code objects * last restart version < current version */ - PyInterpreterState *interp = _PyInterpreterState_GET(); + PyThreadState *tstate = _PyThreadState_GET(); + PyInterpreterState *interp = tstate->interp; uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT; uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT; if (new_version <= MONITORING_VERSION_INCREMENT) { @@ -2130,7 +2153,7 @@ monitoring_restart_events_impl(PyObject *module) return NULL; } interp->last_restart_version = restart_version; - set_global_version(interp, new_version); + set_global_version(tstate, new_version); if (instrument_all_executing_code_objects(interp)) { return NULL; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0cac7109340129..e2c8b66a340eb6 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -663,6 +663,7 @@ pycore_create_interpreter(_PyRuntimeState *runtime, if (tstate == NULL) { return _PyStatus_ERR("can't make first thread"); } + runtime->main_tstate = tstate; _PyThreadState_Bind(tstate); init_interp_create_gil(tstate, config.gil); diff --git a/Python/pystate.c b/Python/pystate.c index e77e5bfa7e2df8..85d57f3bc65734 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -793,7 +793,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) // At this time, all the threads should be cleared so we don't need // atomic operations for eval_breaker - interp->ceval.eval_breaker = 0; + interp->ceval.interp_eval_breaker = 0; for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { interp->monitors.tools[i] = 0; @@ -1306,6 +1306,7 @@ init_threadstate(_PyThreadStateImpl *_tstate, assert(interp != NULL); tstate->interp = interp; + tstate->eval_breaker = interp->ceval.interp_eval_breaker; // next/prev are set in add_threadstate(). assert(tstate->next == NULL); @@ -1987,8 +1988,7 @@ park_detached_threads(struct _stoptheworld_state *stw) } } else if (state == _Py_THREAD_ATTACHED && t != stw->requester) { - // TODO: set this per-thread, rather than per-interpreter. - _Py_set_eval_breaker_bit(t->interp, _PY_EVAL_PLEASE_STOP_BIT, 1); + _PyThreadState_Signal(t, _PY_EVAL_PLEASE_STOP_BIT); } } stw->thread_countdown -= num_parked; @@ -2152,18 +2152,41 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) * deadlock, we need to release head_mutex before * the decref. */ - PyObject *old_exc = tstate->async_exc; - tstate->async_exc = Py_XNewRef(exc); + Py_XINCREF(exc); + PyObject *old_exc = _Py_atomic_exchange_ptr(&tstate->async_exc, exc); HEAD_UNLOCK(runtime); Py_XDECREF(old_exc); - _PyEval_SignalAsyncExc(tstate->interp); + _PyThreadState_Signal(tstate, _PY_ASYNC_EXCEPTION_BIT); return 1; } HEAD_UNLOCK(runtime); return 0; } +void +_PyInterpreterState_SignalAll(PyInterpreterState *interp, uintptr_t bit) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + HEAD_LOCK(runtime); + for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) { + _PyThreadState_Signal(tstate, bit); + } + HEAD_UNLOCK(runtime); +} + +void +_PyInterpreterState_UnsignalAll(PyInterpreterState *interp, uintptr_t bit) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + HEAD_LOCK(runtime); + for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) { + _PyThreadState_Unsignal(tstate, bit); + } + HEAD_UNLOCK(runtime); +} //--------------------------------- // API for the current thread state