Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-126644: Fix various thread safety issues in _interpreters #126696

Closed
Closed
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f18939a
Initial fix.
ZeroIntensity Nov 10, 2024
756bf41
Remove junk file.
ZeroIntensity Nov 10, 2024
45c4561
Merge branch 'main' of https://github.com/python/cpython into interp-…
ZeroIntensity Nov 10, 2024
a94eaef
Don't overwrite the main thread.
ZeroIntensity Nov 10, 2024
206b581
Small changes.
ZeroIntensity Nov 10, 2024
286e536
Put lock around exception capturing.
ZeroIntensity Nov 10, 2024
2fab7af
Add a comment.
ZeroIntensity Nov 10, 2024
446abc1
Add blurb.
ZeroIntensity Nov 10, 2024
cc36e8d
Add a test.
ZeroIntensity Nov 10, 2024
e25587e
Skip test on the default build.
ZeroIntensity Nov 10, 2024
e4b2c79
Fix and move tests.
ZeroIntensity Nov 10, 2024
aeb483e
Somehow, the locks are re-entrant.
ZeroIntensity Nov 10, 2024
92b3ea7
Don't rely on the interpreter state for _PyXI_ERR_ALREADY_RUNNING
ZeroIntensity Nov 11, 2024
804c41e
Throw an exception if there's a remaining thread state.
ZeroIntensity Nov 11, 2024
2edfda3
Remove newline.
ZeroIntensity Nov 11, 2024
bb8feee
Remove another newline.
ZeroIntensity Nov 11, 2024
5968448
Add docstrings.
ZeroIntensity Nov 11, 2024
9ab979d
Protect _PyIndexPool_AllocIndex with the runtime lock.
ZeroIntensity Nov 11, 2024
b478375
Bump the thread count.
ZeroIntensity Nov 11, 2024
f393ab6
Add comments to the test.
ZeroIntensity Nov 11, 2024
35323f8
Re-enable subinterpreter tests for the free-threaded build.
ZeroIntensity Nov 11, 2024
67df772
Fix tests.
ZeroIntensity Nov 11, 2024
13e96de
Lock runtime instead of using pyatomic
ZeroIntensity Nov 11, 2024
c9e7b08
Fix failing builds.
ZeroIntensity Nov 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Include/internal/pycore_crossinterp.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ typedef enum error_code {
_PyXI_ERR_MAIN_NS_FAILURE = -5,
_PyXI_ERR_APPLY_NS_FAILURE = -6,
_PyXI_ERR_NOT_SHAREABLE = -7,
_PyXI_ERR_SHUTTING_DOWN = -8,
} _PyXI_errcode;


Expand Down
17 changes: 17 additions & 0 deletions Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,18 @@ struct _is {
or the size specified by the THREAD_STACK_SIZE macro. */
/* Used in Python/thread.c. */
size_t stacksize;
#ifdef Py_GIL_DISABLED
/*
* Whether setting the main thread should be prevented.
* This stops any data race problems during interpreter
* finalization, because it ensures that no other threads
* can be trying to access the subinterpreter on the free-threaded
* build.
*
* (See GH-126644)
*/
int prevented;
#endif
} threads;

/* Reference to the _PyRuntime global variable. This field exists
Expand Down Expand Up @@ -403,6 +415,11 @@ PyAPI_FUNC(PyStatus) _PyInterpreterState_New(
PyThreadState *tstate,
PyInterpreterState **pinterp);

PyAPI_FUNC(int)
_PyInterpreterState_PreventMain(PyInterpreterState *interp);

PyAPI_FUNC(int)
_PyInterpreterState_IsRunningAllowed(PyInterpreterState *interp);

#define RARE_EVENT_INTERP_INC(interp, name) \
do { \
Expand Down
27 changes: 27 additions & 0 deletions Lib/test/test_free_threading/test_interpreters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import threading
import unittest

from test import support
from test.support import import_helper
from test.support import threading_helper
_interpreters = import_helper.import_module('_interpreters')


@threading_helper.requires_working_threading()
class StressTests(unittest.TestCase):

@support.requires_resource('cpu')
def test_subinterpreter_thread_safety(self):
# GH-126644: _interpreters had thread safety problems on the free-threaded build
interp = _interpreters.create()
threads = [threading.Thread(target=_interpreters.run_string, args=(interp, "1")) for _ in range(1000)]
threads.extend([threading.Thread(target=_interpreters.destroy, args=(interp,)) for _ in range(1000)])
# This will spam all kinds of subinterpreter errors, but we don't care.
# We just want to make sure that it doesn't crash.
with threading_helper.start_threads(threads):
pass


if __name__ == '__main__':
# Test needs to be a package, so we can do relative imports.
unittest.main()
6 changes: 1 addition & 5 deletions Lib/test/test_interpreters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
import os
from test.support import load_package_tests, Py_GIL_DISABLED
import unittest

if Py_GIL_DISABLED:
raise unittest.SkipTest("GIL disabled")
from test.support import load_package_tests

def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)
4 changes: 3 additions & 1 deletion Lib/test/test_interpreters/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,8 @@ def test_still_running(self):
interp.close()
self.assertTrue(interp.is_running())

# TODO: Figure this out
@unittest.skipIf(support.Py_GIL_DISABLED, "does not work on the free-threaded build")
def test_subthreads_still_running(self):
r_interp, w_interp = self.pipe()
r_thread, w_thread = self.pipe()
Expand All @@ -594,9 +596,9 @@ def task():
os.write({w_interp}, {FINISHED!r})
t = threading.Thread(target=task)
t.start()
t.join()
""")
interp.close()

self.assertEqual(os.read(r_interp, 1), FINISHED)

def test_created_with_capi(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix crash when using subinterpreters in multiple threads on the
:term:`free-threaded <free threading>` build.
25 changes: 25 additions & 0 deletions Modules/_interpretersmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,31 @@ interp_destroy(PyObject *self, PyObject *args, PyObject *kwds)
return NULL;
}

#ifdef Py_GIL_DISABLED
if (_PyInterpreterState_PreventMain(interp) == 0) {
PyErr_SetString(PyExc_InterpreterError,
"interpreter started running during shutdown");
return NULL;
}

if (_Py_atomic_load_ptr_relaxed(&interp->threads.head) != NULL)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't fully understand what you're doing here, but access to the linked list of thread states requires holding HEAD_LOCK().

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case is when _PyInterpreterState_PreventMain was able to set the prevention flag, but there was a thread that was able to set the main thread right before that happened, so we can't destroy the interpreter while that thread is still alive (but no more threads will be able to set main after its done).

{
/*
* We're in a weird limbo where the main thread isn't set but
* the thread states haven't fully cleared yet.
*/
PyErr_SetString(PyExc_InterpreterError, "interpreter is still finishing");
return NULL;
}

/*
* Running main has now been prevented, the interpreter will
* never run again (GH-126644).
*/
assert(!is_running_main(interp));
assert(!_PyInterpreterState_IsRunningAllowed(interp));
#endif

// Destroy the interpreter.
_PyXI_EndInterpreter(interp, NULL, NULL);

Expand Down
19 changes: 17 additions & 2 deletions Python/crossinterp.c
Original file line number Diff line number Diff line change
Expand Up @@ -986,8 +986,11 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp)
break;
case _PyXI_ERR_ALREADY_RUNNING:
assert(interp != NULL);
assert(_PyInterpreterState_IsRunningMain(interp));
_PyInterpreterState_FailIfRunningMain(interp);
// We can't use _PyInterpreterState_FailIfRunningMain, because
// it's possible that another thread has already changed the state
// of the interpreter by now.
PyErr_SetString(PyExc_InterpreterError,
"interpreter already running");
break;
case _PyXI_ERR_MAIN_NS_FAILURE:
PyErr_SetString(PyExc_InterpreterError,
Expand All @@ -1000,6 +1003,10 @@ _PyXI_ApplyErrorCode(_PyXI_errcode code, PyInterpreterState *interp)
case _PyXI_ERR_NOT_SHAREABLE:
_set_xid_lookup_failure(interp, NULL, NULL);
break;
case _PyXI_ERR_SHUTTING_DOWN:
PyErr_SetString(PyExc_InterpreterError,
"interpreter is shutting down");
break;
default:
#ifdef Py_DEBUG
Py_UNREACHABLE();
Expand Down Expand Up @@ -1716,9 +1723,17 @@ _PyXI_Enter(_PyXI_session *session,
// In the case where we didn't switch interpreters, it would
// be more efficient to leave the exception in place and return
// immediately. However, life is simpler if we don't.
#ifdef Py_GIL_DISABLED
errcode = _PyInterpreterState_IsRunningAllowed(interp) == 1
? _PyXI_ERR_ALREADY_RUNNING
: _PyXI_ERR_SHUTTING_DOWN;
#else
errcode = _PyXI_ERR_ALREADY_RUNNING;
#endif
goto error;
}

assert(_PyInterpreterState_IsRunningAllowed(interp));
session->running = 1;

// Cache __main__.__dict__.
Expand Down
79 changes: 77 additions & 2 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1047,9 +1047,80 @@ get_main_thread(PyInterpreterState *interp)
return _Py_atomic_load_ptr_relaxed(&interp->threads.main);
}

#define _PyInterpreterState_RUNNING_OK 0
#define _PyInterpreterState_RUNNING_PREVENTED 1

/*
* Is this interpreter allowed to set the main thread anymore?
*/
int
_PyInterpreterState_IsRunningAllowed(PyInterpreterState *interp)
{
assert(interp != NULL);
#ifdef Py_GIL_DISABLED
return _Py_atomic_load_int(&interp->threads.prevented) == _PyInterpreterState_RUNNING_OK;
#else
return 1;
#endif
}

/*
* Atomically prevent future setting of the main thread.
*
* If something goes wrong, then this returns 0 without
* an exception, and 1 otherwise.
*
* On the GIL-ful build, this is a no-op.
*/
int
_PyInterpreterState_PreventMain(PyInterpreterState *interp)
{
assert(interp != NULL);
#ifdef Py_GIL_DISABLED
if (_PyInterpreterState_IsRunningMain(interp))
{
// Interpreter is running, can't prevent it yet.
return 0;
}

int expected_prevented = _PyInterpreterState_RUNNING_OK;
if (_Py_atomic_compare_exchange_int(&interp->threads.prevented,
&expected_prevented,
_PyInterpreterState_RUNNING_PREVENTED) == 0)
{
// Another thread beat us!
return 0;
}

if (_PyInterpreterState_IsRunningMain(interp))
{
// Another thread started right in between the
// prevention period!

// XXX Is having the prevented flag set a problem for a
// running interpreter?
return 0;
}

assert(!_PyInterpreterState_IsRunningAllowed(interp));
assert(!_PyInterpreterState_IsRunningMain(interp));
return 1;
#else
// The GIL protects us on the default build, so this
// case shouldn't be possible.
assert(!_PyInterpreterState_IsRunningMain(interp));
return 1;
#endif
}

int
_PyInterpreterState_SetRunningMain(PyInterpreterState *interp)
{
if (!_PyInterpreterState_IsRunningAllowed(interp))
{
PyErr_SetString(PyExc_RuntimeError, "cannot run this interpreter anymore");
return -1;
}
if (_PyInterpreterState_FailIfRunningMain(interp) < 0) {
return -1;
}
Expand Down Expand Up @@ -1513,15 +1584,19 @@ new_threadstate(PyInterpreterState *interp, int whence)
PyMem_RawFree(new_tstate);
return NULL;
}

/* _Py_ReserveTLBCIndex has thread safety issues */
HEAD_LOCK(runtime);
Comment on lines +1588 to +1589
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem right. _PyIndexPool_AllocIndex locks internally.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, that's what I thought too. I'm not too sure what's going on here, but this segfaults without this lock.


int32_t tlbc_idx = _Py_ReserveTLBCIndex(interp);
if (tlbc_idx < 0) {
PyMem_RawFree(new_tstate);
return NULL;
}
#endif

#else
/* We serialize concurrent creation to protect global state. */
HEAD_LOCK(runtime);
#endif

interp->threads.next_unique_id += 1;
uint64_t id = interp->threads.next_unique_id;
Expand Down
Loading