Skip to content

Commit

Permalink
pythongh-115035: Mark ThreadHandles as non-joinable earlier after for…
Browse files Browse the repository at this point in the history
…king (python#115042)

This marks dead ThreadHandles as non-joinable earlier in
`PyOS_AfterFork_Child()` before we execute any Python code. The handles
are stored in a global linked list in `_PyRuntimeState` because `fork()`
affects the entire process.
  • Loading branch information
colesbury authored Feb 6, 2024
1 parent 71239d5 commit b6228b5
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 41 deletions.
15 changes: 9 additions & 6 deletions Include/internal/pycore_pythread.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ extern "C" {
#endif

#include "dynamic_annotations.h" // _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX
#include "pycore_llist.h" // struct llist_node

// Get _POSIX_THREADS and _POSIX_SEMAPHORES macros if available
#if (defined(HAVE_UNISTD_H) && !defined(_POSIX_THREADS) \
Expand Down Expand Up @@ -75,14 +76,22 @@ struct _pythread_runtime_state {
struct py_stub_tls_entry tls_entries[PTHREAD_KEYS_MAX];
} stubs;
#endif

// Linked list of ThreadHandleObjects
struct llist_node handles;
};

#define _pythread_RUNTIME_INIT(pythread) \
{ \
.handles = LLIST_INIT(pythread.handles), \
}

#ifdef HAVE_FORK
/* Private function to reinitialize a lock at fork in the child process.
Reset the lock to the unlocked state.
Return 0 on success, return -1 on error. */
extern int _PyThread_at_fork_reinit(PyThread_type_lock *lock);
extern void _PyThread_AfterFork(struct _pythread_runtime_state *state);
#endif /* HAVE_FORK */


Expand Down Expand Up @@ -143,12 +152,6 @@ PyAPI_FUNC(int) PyThread_join_thread(PyThread_handle_t);
*/
PyAPI_FUNC(int) PyThread_detach_thread(PyThread_handle_t);

/*
* Obtain the new thread ident and handle in a forked child process.
*/
PyAPI_FUNC(void) PyThread_update_thread_after_fork(PyThread_ident_t* ident,
PyThread_handle_t* handle);

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 2 additions & 0 deletions Include/internal/pycore_runtime_init.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ extern "C" {
#include "pycore_parser.h" // _parser_runtime_state_INIT
#include "pycore_pyhash.h" // pyhash_state_INIT
#include "pycore_pymem_init.h" // _pymem_allocators_standard_INIT
#include "pycore_pythread.h" // _pythread_RUNTIME_INIT
#include "pycore_runtime_init_generated.h" // _Py_bytes_characters_INIT
#include "pycore_signal.h" // _signals_RUNTIME_INIT
#include "pycore_tracemalloc.h" // _tracemalloc_runtime_state_INIT
Expand Down Expand Up @@ -90,6 +91,7 @@ extern PyTypeObject _PyExc_MemoryError;
}, \
.obmalloc = _obmalloc_global_state_INIT, \
.pyhash_state = pyhash_state_INIT, \
.threads = _pythread_RUNTIME_INIT(runtime.threads), \
.signals = _signals_RUNTIME_INIT, \
.interpreters = { \
/* This prevents interpreters from getting created \
Expand Down
5 changes: 1 addition & 4 deletions Lib/threading.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,6 @@ def _after_fork(self, new_ident=None):
# This thread is alive.
self._ident = new_ident
if self._handle is not None:
self._handle.after_fork_alive()
assert self._handle.ident == new_ident
# bpo-42350: If the fork happens when the thread is already stopped
# (ex: after threading._shutdown() has been called), _tstate_lock
Expand All @@ -965,9 +964,7 @@ def _after_fork(self, new_ident=None):
self._is_stopped = True
self._tstate_lock = None
self._join_lock = None
if self._handle is not None:
self._handle.after_fork_dead()
self._handle = None
self._handle = None

def __repr__(self):
assert self._initialized, "Thread.__init__() was not called"
Expand Down
53 changes: 36 additions & 17 deletions Modules/_threadmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ get_thread_state(PyObject *module)

typedef struct {
PyObject_HEAD
struct llist_node node; // linked list node (see _pythread_runtime_state)
PyThread_ident_t ident;
PyThread_handle_t handle;
char joinable;
Expand All @@ -59,13 +60,26 @@ new_thread_handle(thread_module_state* state)
self->ident = 0;
self->handle = 0;
self->joinable = 0;

HEAD_LOCK(&_PyRuntime);
llist_insert_tail(&_PyRuntime.threads.handles, &self->node);
HEAD_UNLOCK(&_PyRuntime);

return self;
}

static void
ThreadHandle_dealloc(ThreadHandleObject *self)
{
PyObject *tp = (PyObject *) Py_TYPE(self);

// Remove ourself from the global list of handles
HEAD_LOCK(&_PyRuntime);
if (self->node.next != NULL) {
llist_remove(&self->node);
}
HEAD_UNLOCK(&_PyRuntime);

if (self->joinable) {
int ret = PyThread_detach_thread(self->handle);
if (ret) {
Expand All @@ -77,6 +91,28 @@ ThreadHandle_dealloc(ThreadHandleObject *self)
Py_DECREF(tp);
}

void
_PyThread_AfterFork(struct _pythread_runtime_state *state)
{
// gh-115035: We mark ThreadHandles as not joinable early in the child's
// after-fork handler. We do this before calling any Python code to ensure
// that it happens before any ThreadHandles are deallocated, such as by a
// GC cycle.
PyThread_ident_t current = PyThread_get_thread_ident_ex();

struct llist_node *node;
llist_for_each_safe(node, &state->handles) {
ThreadHandleObject *hobj = llist_data(node, ThreadHandleObject, node);
if (hobj->ident == current) {
continue;
}

// Disallow calls to detach() and join() as they could crash.
hobj->joinable = 0;
llist_remove(node);
}
}

static PyObject *
ThreadHandle_repr(ThreadHandleObject *self)
{
Expand All @@ -91,21 +127,6 @@ ThreadHandle_get_ident(ThreadHandleObject *self, void *ignored)
}


static PyObject *
ThreadHandle_after_fork_alive(ThreadHandleObject *self, void* ignored)
{
PyThread_update_thread_after_fork(&self->ident, &self->handle);
Py_RETURN_NONE;
}

static PyObject *
ThreadHandle_after_fork_dead(ThreadHandleObject *self, void* ignored)
{
// Disallow calls to detach() and join() as they could crash.
self->joinable = 0;
Py_RETURN_NONE;
}

static PyObject *
ThreadHandle_detach(ThreadHandleObject *self, void* ignored)
{
Expand Down Expand Up @@ -157,8 +178,6 @@ static PyGetSetDef ThreadHandle_getsetlist[] = {

static PyMethodDef ThreadHandle_methods[] =
{
{"after_fork_alive", (PyCFunction)ThreadHandle_after_fork_alive, METH_NOARGS},
{"after_fork_dead", (PyCFunction)ThreadHandle_after_fork_dead, METH_NOARGS},
{"detach", (PyCFunction)ThreadHandle_detach, METH_NOARGS},
{"join", (PyCFunction)ThreadHandle_join, METH_NOARGS},
{0, 0}
Expand Down
2 changes: 2 additions & 0 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
return _PyStatus_NO_MEMORY();
}

_PyThread_AfterFork(&runtime->threads);

return _PyStatus_OK();
}
#endif
Expand Down
4 changes: 0 additions & 4 deletions Python/thread_nt.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,6 @@ PyThread_detach_thread(PyThread_handle_t handle) {
return (CloseHandle(hThread) == 0);
}

void
PyThread_update_thread_after_fork(PyThread_ident_t* ident, PyThread_handle_t* handle) {
}

/*
* Return the thread Id instead of a handle. The Id is said to uniquely identify the
* thread in the system
Expand Down
10 changes: 0 additions & 10 deletions Python/thread_pthread.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,16 +339,6 @@ PyThread_detach_thread(PyThread_handle_t th) {
return pthread_detach((pthread_t) th);
}

void
PyThread_update_thread_after_fork(PyThread_ident_t* ident, PyThread_handle_t* handle) {
// The thread id might have been updated in the forked child
pthread_t th = pthread_self();
*ident = (PyThread_ident_t) th;
*handle = (PyThread_handle_t) th;
assert(th == (pthread_t) *ident);
assert(th == (pthread_t) *handle);
}

/* XXX This implementation is considered (to quote Tim Peters) "inherently
hosed" because:
- It does not guarantee the promise that a non-zero integer is returned.
Expand Down

0 comments on commit b6228b5

Please sign in to comment.