From b0f278ff0551b06191cec01445c577e3b25570da Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 11 Dec 2024 16:06:07 +0100 Subject: [PATCH] gh-127065: Make methodcaller thread-safe and re-entrant (GH-127746) The function `operator.methodcaller` was not thread-safe since the additional of the vectorcall method in gh-89013. In the free threading build the issue is easy to trigger, for the normal build harder. This makes the `methodcaller` safe by: * Replacing the lazy initialization with initialization in the constructor. * Using a stack allocated space for the vectorcall arguments and falling back to `tp_call` for calls with more than 8 arguments. --- .../test_free_threading/test_methodcaller.py | 33 ++++ Lib/test/test_operator.py | 13 ++ ...-12-01-22-28-41.gh-issue-127065.tFpRer.rst | 1 + Modules/_operator.c | 180 ++++++++---------- 4 files changed, 131 insertions(+), 96 deletions(-) create mode 100644 Lib/test/test_free_threading/test_methodcaller.py create mode 100644 Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst diff --git a/Lib/test/test_free_threading/test_methodcaller.py b/Lib/test/test_free_threading/test_methodcaller.py new file mode 100644 index 00000000000000..8846b0010012f2 --- /dev/null +++ b/Lib/test/test_free_threading/test_methodcaller.py @@ -0,0 +1,33 @@ +import unittest +from threading import Thread +from test.support import threading_helper +from operator import methodcaller + + +class TestMethodcaller(unittest.TestCase): + def test_methodcaller_threading(self): + number_of_threads = 10 + size = 4_000 + + mc = methodcaller("append", 2) + + def work(mc, l, ii): + for _ in range(ii): + mc(l) + + worker_threads = [] + lists = [] + for ii in range(number_of_threads): + l = [] + lists.append(l) + worker_threads.append(Thread(target=work, args=[mc, l, size])) + for t in worker_threads: + t.start() + for t in worker_threads: + t.join() + for l in lists: + assert len(l) == size + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py index 812d46482e238a..82578a0ef1e6f2 100644 --- a/Lib/test/test_operator.py +++ b/Lib/test/test_operator.py @@ -482,6 +482,8 @@ def bar(self, f=42): return f def baz(*args, **kwds): return kwds['name'], kwds['self'] + def return_arguments(self, *args, **kwds): + return args, kwds a = A() f = operator.methodcaller('foo') self.assertRaises(IndexError, f, a) @@ -498,6 +500,17 @@ def baz(*args, **kwds): f = operator.methodcaller('baz', name='spam', self='eggs') self.assertEqual(f(a), ('spam', 'eggs')) + many_positional_arguments = tuple(range(10)) + many_kw_arguments = dict(zip('abcdefghij', range(10))) + f = operator.methodcaller('return_arguments', *many_positional_arguments) + self.assertEqual(f(a), (many_positional_arguments, {})) + + f = operator.methodcaller('return_arguments', **many_kw_arguments) + self.assertEqual(f(a), ((), many_kw_arguments)) + + f = operator.methodcaller('return_arguments', *many_positional_arguments, **many_kw_arguments) + self.assertEqual(f(a), (many_positional_arguments, many_kw_arguments)) + def test_inplace(self): operator = self.module class C(object): diff --git a/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst b/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst new file mode 100644 index 00000000000000..03d6953b9ddfa1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-01-22-28-41.gh-issue-127065.tFpRer.rst @@ -0,0 +1 @@ +Make :func:`operator.methodcaller` thread-safe and re-entrant safe. diff --git a/Modules/_operator.c b/Modules/_operator.c index 6c1945174ab7cd..ce3ef015710223 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1595,78 +1595,75 @@ static PyType_Spec attrgetter_type_spec = { typedef struct { PyObject_HEAD PyObject *name; - PyObject *xargs; // reference to arguments passed in constructor + PyObject *args; PyObject *kwds; - PyObject **vectorcall_args; /* Borrowed references */ + PyObject *vectorcall_args; PyObject *vectorcall_kwnames; vectorcallfunc vectorcall; } methodcallerobject; -#ifndef Py_GIL_DISABLED -static int _methodcaller_initialize_vectorcall(methodcallerobject* mc) -{ - PyObject* args = mc->xargs; - PyObject* kwds = mc->kwds; - - Py_ssize_t nargs = PyTuple_GET_SIZE(args); - assert(nargs > 0); - mc->vectorcall_args = PyMem_Calloc( - nargs + (kwds ? PyDict_Size(kwds) : 0), - sizeof(PyObject*)); - if (!mc->vectorcall_args) { - PyErr_NoMemory(); - return -1; - } - /* The first item of vectorcall_args will be filled with obj later */ - if (nargs > 1) { - memcpy(mc->vectorcall_args, PySequence_Fast_ITEMS(args), - nargs * sizeof(PyObject*)); - } - if (kwds) { - const Py_ssize_t nkwds = PyDict_Size(kwds); - - mc->vectorcall_kwnames = PyTuple_New(nkwds); - if (!mc->vectorcall_kwnames) { - return -1; - } - Py_ssize_t i = 0, ppos = 0; - PyObject* key, * value; - while (PyDict_Next(kwds, &ppos, &key, &value)) { - PyTuple_SET_ITEM(mc->vectorcall_kwnames, i, Py_NewRef(key)); - mc->vectorcall_args[nargs + i] = value; // borrowed reference - ++i; - } - } - else { - mc->vectorcall_kwnames = NULL; - } - return 1; -} +#define _METHODCALLER_MAX_ARGS 8 static PyObject * -methodcaller_vectorcall( - methodcallerobject *mc, PyObject *const *args, size_t nargsf, PyObject* kwnames) +methodcaller_vectorcall(methodcallerobject *mc, PyObject *const *args, + size_t nargsf, PyObject* kwnames) { if (!_PyArg_CheckPositional("methodcaller", PyVectorcall_NARGS(nargsf), 1, 1) || !_PyArg_NoKwnames("methodcaller", kwnames)) { return NULL; } - if (mc->vectorcall_args == NULL) { - if (_methodcaller_initialize_vectorcall(mc) < 0) { - return NULL; - } - } + assert(mc->vectorcall_args != NULL); + + PyObject *tmp_args[_METHODCALLER_MAX_ARGS]; + tmp_args[0] = args[0]; + assert(1 + PyTuple_GET_SIZE(mc->vectorcall_args) <= _METHODCALLER_MAX_ARGS); + memcpy(tmp_args + 1, _PyTuple_ITEMS(mc->vectorcall_args), sizeof(PyObject *) * PyTuple_GET_SIZE(mc->vectorcall_args)); - assert(mc->vectorcall_args != 0); - mc->vectorcall_args[0] = args[0]; - return PyObject_VectorcallMethod( - mc->name, mc->vectorcall_args, - (PyTuple_GET_SIZE(mc->xargs)) | PY_VECTORCALL_ARGUMENTS_OFFSET, + return PyObject_VectorcallMethod(mc->name, tmp_args, + (1 + PyTuple_GET_SIZE(mc->args)) | PY_VECTORCALL_ARGUMENTS_OFFSET, mc->vectorcall_kwnames); } -#endif +static int +_methodcaller_initialize_vectorcall(methodcallerobject* mc) +{ + PyObject* args = mc->args; + PyObject* kwds = mc->kwds; + + if (kwds && PyDict_Size(kwds)) { + PyObject *values = PyDict_Values(kwds); + if (!values) { + return -1; + } + PyObject *values_tuple = PySequence_Tuple(values); + Py_DECREF(values); + if (!values_tuple) { + return -1; + } + if (PyTuple_GET_SIZE(args)) { + mc->vectorcall_args = PySequence_Concat(args, values_tuple); + Py_DECREF(values_tuple); + if (mc->vectorcall_args == NULL) { + return -1; + } + } + else { + mc->vectorcall_args = values_tuple; + } + mc->vectorcall_kwnames = PySequence_Tuple(kwds); + if (!mc->vectorcall_kwnames) { + return -1; + } + } + else { + mc->vectorcall_args = Py_NewRef(args); + mc->vectorcall_kwnames = NULL; + } + + mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; + return 0; +} /* AC 3.5: variable number of arguments, not currently support by AC */ static PyObject * @@ -1694,25 +1691,30 @@ methodcaller_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (mc == NULL) { return NULL; } + mc->vectorcall = NULL; + mc->vectorcall_args = NULL; + mc->vectorcall_kwnames = NULL; + mc->kwds = Py_XNewRef(kwds); Py_INCREF(name); PyInterpreterState *interp = _PyInterpreterState_GET(); _PyUnicode_InternMortal(interp, &name); mc->name = name; - mc->xargs = Py_XNewRef(args); // allows us to use borrowed references - mc->kwds = Py_XNewRef(kwds); - mc->vectorcall_args = 0; - + mc->args = PyTuple_GetSlice(args, 1, PyTuple_GET_SIZE(args)); + if (mc->args == NULL) { + Py_DECREF(mc); + return NULL; + } -#ifdef Py_GIL_DISABLED - // gh-127065: The current implementation of methodcaller_vectorcall - // is not thread-safe because it modifies the `vectorcall_args` array, - // which is shared across calls. - mc->vectorcall = NULL; -#else - mc->vectorcall = (vectorcallfunc)methodcaller_vectorcall; -#endif + Py_ssize_t vectorcall_size = PyTuple_GET_SIZE(args) + + (kwds ? PyDict_Size(kwds) : 0); + if (vectorcall_size < (_METHODCALLER_MAX_ARGS)) { + if (_methodcaller_initialize_vectorcall(mc) < 0) { + Py_DECREF(mc); + return NULL; + } + } PyObject_GC_Track(mc); return (PyObject *)mc; @@ -1722,13 +1724,10 @@ static void methodcaller_clear(methodcallerobject *mc) { Py_CLEAR(mc->name); - Py_CLEAR(mc->xargs); + Py_CLEAR(mc->args); Py_CLEAR(mc->kwds); - if (mc->vectorcall_args != NULL) { - PyMem_Free(mc->vectorcall_args); - mc->vectorcall_args = 0; - Py_CLEAR(mc->vectorcall_kwnames); - } + Py_CLEAR(mc->vectorcall_args); + Py_CLEAR(mc->vectorcall_kwnames); } static void @@ -1745,8 +1744,10 @@ static int methodcaller_traverse(methodcallerobject *mc, visitproc visit, void *arg) { Py_VISIT(mc->name); - Py_VISIT(mc->xargs); + Py_VISIT(mc->args); Py_VISIT(mc->kwds); + Py_VISIT(mc->vectorcall_args); + Py_VISIT(mc->vectorcall_kwnames); Py_VISIT(Py_TYPE(mc)); return 0; } @@ -1765,15 +1766,7 @@ methodcaller_call(methodcallerobject *mc, PyObject *args, PyObject *kw) if (method == NULL) return NULL; - - PyObject *cargs = PyTuple_GetSlice(mc->xargs, 1, PyTuple_GET_SIZE(mc->xargs)); - if (cargs == NULL) { - Py_DECREF(method); - return NULL; - } - - result = PyObject_Call(method, cargs, mc->kwds); - Py_DECREF(cargs); + result = PyObject_Call(method, mc->args, mc->kwds); Py_DECREF(method); return result; } @@ -1791,7 +1784,7 @@ methodcaller_repr(methodcallerobject *mc) } numkwdargs = mc->kwds != NULL ? PyDict_GET_SIZE(mc->kwds) : 0; - numposargs = PyTuple_GET_SIZE(mc->xargs) - 1; + numposargs = PyTuple_GET_SIZE(mc->args); numtotalargs = numposargs + numkwdargs; if (numtotalargs == 0) { @@ -1807,7 +1800,7 @@ methodcaller_repr(methodcallerobject *mc) } for (i = 0; i < numposargs; ++i) { - PyObject *onerepr = PyObject_Repr(PyTuple_GET_ITEM(mc->xargs, i+1)); + PyObject *onerepr = PyObject_Repr(PyTuple_GET_ITEM(mc->args, i)); if (onerepr == NULL) goto done; PyTuple_SET_ITEM(argreprs, i, onerepr); @@ -1859,14 +1852,14 @@ methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) { if (!mc->kwds || PyDict_GET_SIZE(mc->kwds) == 0) { Py_ssize_t i; - Py_ssize_t newarg_size = PyTuple_GET_SIZE(mc->xargs); - PyObject *newargs = PyTuple_New(newarg_size); + Py_ssize_t callargcount = PyTuple_GET_SIZE(mc->args); + PyObject *newargs = PyTuple_New(1 + callargcount); if (newargs == NULL) return NULL; PyTuple_SET_ITEM(newargs, 0, Py_NewRef(mc->name)); - for (i = 1; i < newarg_size; ++i) { - PyObject *arg = PyTuple_GET_ITEM(mc->xargs, i); - PyTuple_SET_ITEM(newargs, i, Py_NewRef(arg)); + for (i = 0; i < callargcount; ++i) { + PyObject *arg = PyTuple_GET_ITEM(mc->args, i); + PyTuple_SET_ITEM(newargs, i + 1, Py_NewRef(arg)); } return Py_BuildValue("ON", Py_TYPE(mc), newargs); } @@ -1884,12 +1877,7 @@ methodcaller_reduce(methodcallerobject *mc, PyObject *Py_UNUSED(ignored)) constructor = PyObject_VectorcallDict(partial, newargs, 2, mc->kwds); Py_DECREF(partial); - PyObject *args = PyTuple_GetSlice(mc->xargs, 1, PyTuple_GET_SIZE(mc->xargs)); - if (!args) { - Py_DECREF(constructor); - return NULL; - } - return Py_BuildValue("NO", constructor, args); + return Py_BuildValue("NO", constructor, mc->args); } }