From bbee57fa8c318cb26d6c8651254927a1972c9738 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 21 Mar 2024 09:56:12 -0600 Subject: [PATCH 01/39] gh-76785: Clean Up Interpreter ID Conversions (gh-117048) Mostly we unify the two different implementations of the conversion code (from PyObject * to int64_t. We also drop the PyArg_ParseTuple()-style converter function, as well as rename and move PyInterpreterID_LookUp(). --- Include/cpython/interpreteridobject.h | 5 +- Include/internal/pycore_interp.h | 3 + Lib/test/test_capi/test_misc.py | 4 +- Modules/_testcapimodule.c | 26 ------- Modules/_testinternalcapi.c | 32 +++++++- Modules/_xxsubinterpretersmodule.c | 82 +------------------- Objects/interpreteridobject.c | 66 ++++++----------- Python/pystate.c | 103 ++++++++++++++++++++------ 8 files changed, 143 insertions(+), 178 deletions(-) diff --git a/Include/cpython/interpreteridobject.h b/Include/cpython/interpreteridobject.h index 4ab9ad5d315f80..d425c909806e44 100644 --- a/Include/cpython/interpreteridobject.h +++ b/Include/cpython/interpreteridobject.h @@ -8,4 +8,7 @@ PyAPI_DATA(PyTypeObject) PyInterpreterID_Type; PyAPI_FUNC(PyObject *) PyInterpreterID_New(int64_t); PyAPI_FUNC(PyObject *) PyInterpreterState_GetIDObject(PyInterpreterState *); -PyAPI_FUNC(PyInterpreterState *) PyInterpreterID_LookUp(PyObject *); + +#ifdef Py_BUILD_CORE +extern int64_t _PyInterpreterID_GetID(PyObject *); +#endif diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 942f47340b3966..b28e8a3ff45f3f 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -295,8 +295,11 @@ _PyInterpreterState_SetFinalizing(PyInterpreterState *interp, PyThreadState *tst } +extern int64_t _PyInterpreterState_ObjectToID(PyObject *); + // Export for the _xxinterpchannels module. PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpID(int64_t); +PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpIDObject(PyObject *); PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 5b4f67e7f5f58d..fe5e19d46d8b6c 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2303,7 +2303,7 @@ def test_equality(self): def test_linked_lifecycle(self): id1 = _interpreters.create() - _testcapi.unlink_interpreter_refcount(id1) + _testinternalcapi.unlink_interpreter_refcount(id1) self.assertEqual( _testinternalcapi.get_interpreter_refcount(id1), 0) @@ -2319,7 +2319,7 @@ def test_linked_lifecycle(self): _testinternalcapi.get_interpreter_refcount(id1), 0) - _testcapi.link_interpreter_refcount(id1) + _testinternalcapi.link_interpreter_refcount(id1) self.assertEqual( _testinternalcapi.get_interpreter_refcount(id1), 0) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index b73085bb8f67ce..e68d083955d64a 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1455,30 +1455,6 @@ get_interpreterid_type(PyObject *self, PyObject *Py_UNUSED(ignored)) return Py_NewRef(&PyInterpreterID_Type); } -static PyObject * -link_interpreter_refcount(PyObject *self, PyObject *idobj) -{ - PyInterpreterState *interp = PyInterpreterID_LookUp(idobj); - if (interp == NULL) { - assert(PyErr_Occurred()); - return NULL; - } - _PyInterpreterState_RequireIDRef(interp, 1); - Py_RETURN_NONE; -} - -static PyObject * -unlink_interpreter_refcount(PyObject *self, PyObject *idobj) -{ - PyInterpreterState *interp = PyInterpreterID_LookUp(idobj); - if (interp == NULL) { - assert(PyErr_Occurred()); - return NULL; - } - _PyInterpreterState_RequireIDRef(interp, 0); - Py_RETURN_NONE; -} - static PyMethodDef ml; static PyObject * @@ -3324,8 +3300,6 @@ static PyMethodDef TestMethods[] = { {"test_current_tstate_matches", test_current_tstate_matches, METH_NOARGS}, {"run_in_subinterp", run_in_subinterp, METH_VARARGS}, {"get_interpreterid_type", get_interpreterid_type, METH_NOARGS}, - {"link_interpreter_refcount", link_interpreter_refcount, METH_O}, - {"unlink_interpreter_refcount", unlink_interpreter_refcount, METH_O}, {"create_cfunction", create_cfunction, METH_NOARGS}, {"call_in_temporary_c_thread", call_in_temporary_c_thread, METH_VARARGS, PyDoc_STR("set_error_class(error_class) -> None")}, diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 1c10dd02138f3a..f73a29e5afe801 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -29,8 +29,6 @@ #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pystate.h" // _PyThreadState_GET() -#include "interpreteridobject.h" // PyInterpreterID_LookUp() - #include "clinic/_testinternalcapi.c.h" // Include test definitions from _testinternalcapi/ @@ -1112,7 +1110,7 @@ pending_identify(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "O:pending_identify", &interpid)) { return NULL; } - PyInterpreterState *interp = PyInterpreterID_LookUp(interpid); + PyInterpreterState *interp = _PyInterpreterState_LookUpIDObject(interpid); if (interp == NULL) { if (!PyErr_Occurred()) { PyErr_SetString(PyExc_ValueError, "interpreter not found"); @@ -1480,13 +1478,37 @@ run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) static PyObject * get_interpreter_refcount(PyObject *self, PyObject *idobj) { - PyInterpreterState *interp = PyInterpreterID_LookUp(idobj); + PyInterpreterState *interp = _PyInterpreterState_LookUpIDObject(idobj); if (interp == NULL) { return NULL; } return PyLong_FromLongLong(interp->id_refcount); } +static PyObject * +link_interpreter_refcount(PyObject *self, PyObject *idobj) +{ + PyInterpreterState *interp = _PyInterpreterState_LookUpIDObject(idobj); + if (interp == NULL) { + assert(PyErr_Occurred()); + return NULL; + } + _PyInterpreterState_RequireIDRef(interp, 1); + Py_RETURN_NONE; +} + +static PyObject * +unlink_interpreter_refcount(PyObject *self, PyObject *idobj) +{ + PyInterpreterState *interp = _PyInterpreterState_LookUpIDObject(idobj); + if (interp == NULL) { + assert(PyErr_Occurred()); + return NULL; + } + _PyInterpreterState_RequireIDRef(interp, 0); + Py_RETURN_NONE; +} + static void _xid_capsule_destructor(PyObject *capsule) @@ -1728,6 +1750,8 @@ static PyMethodDef module_functions[] = { _PyCFunction_CAST(run_in_subinterp_with_config), METH_VARARGS | METH_KEYWORDS}, {"get_interpreter_refcount", get_interpreter_refcount, METH_O}, + {"link_interpreter_refcount", link_interpreter_refcount, METH_O}, + {"unlink_interpreter_refcount", unlink_interpreter_refcount, METH_O}, {"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS}, {"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS}, {"get_crossinterp_data", get_crossinterp_data, METH_VARARGS}, diff --git a/Modules/_xxsubinterpretersmodule.c b/Modules/_xxsubinterpretersmodule.c index 28c2f9c08bc0da..606b2a36481ce2 100644 --- a/Modules/_xxsubinterpretersmodule.c +++ b/Modules/_xxsubinterpretersmodule.c @@ -35,83 +35,8 @@ _get_current_interp(void) return PyInterpreterState_Get(); } -static int64_t -pylong_to_interpid(PyObject *idobj) -{ - assert(PyLong_CheckExact(idobj)); - - if (_PyLong_IsNegative((PyLongObject *)idobj)) { - PyErr_Format(PyExc_ValueError, - "interpreter ID must be a non-negative int, got %R", - idobj); - return -1; - } - - int overflow; - long long id = PyLong_AsLongLongAndOverflow(idobj, &overflow); - if (id == -1) { - if (!overflow) { - assert(PyErr_Occurred()); - return -1; - } - assert(!PyErr_Occurred()); - // For now, we don't worry about if LLONG_MAX < INT64_MAX. - goto bad_id; - } -#if LLONG_MAX > INT64_MAX - if (id > INT64_MAX) { - goto bad_id; - } -#endif - return (int64_t)id; - -bad_id: - PyErr_Format(PyExc_RuntimeError, - "unrecognized interpreter ID %O", idobj); - return -1; -} - -static int64_t -convert_interpid_obj(PyObject *arg) -{ - int64_t id = -1; - if (_PyIndex_Check(arg)) { - PyObject *idobj = PyNumber_Long(arg); - if (idobj == NULL) { - return -1; - } - id = pylong_to_interpid(idobj); - Py_DECREF(idobj); - if (id < 0) { - return -1; - } - } - else { - PyErr_Format(PyExc_TypeError, - "interpreter ID must be an int, got %.100s", - Py_TYPE(arg)->tp_name); - return -1; - } - return id; -} - -static PyInterpreterState * -look_up_interp(PyObject *arg) -{ - int64_t id = convert_interpid_obj(arg); - if (id < 0) { - return NULL; - } - return _PyInterpreterState_LookUpID(id); -} - +#define look_up_interp _PyInterpreterState_LookUpIDObject -static PyObject * -interpid_to_pylong(int64_t id) -{ - assert(id < LLONG_MAX); - return PyLong_FromLongLong(id); -} static PyObject * get_interpid_obj(PyInterpreterState *interp) @@ -123,7 +48,8 @@ get_interpid_obj(PyInterpreterState *interp) if (id < 0) { return NULL; } - return interpid_to_pylong(id); + assert(id < LLONG_MAX); + return PyLong_FromLongLong(id); } static PyObject * @@ -699,7 +625,7 @@ interp_set___main___attrs(PyObject *self, PyObject *args) } // Look up the interpreter. - PyInterpreterState *interp = PyInterpreterID_LookUp(id); + PyInterpreterState *interp = look_up_interp(id); if (interp == NULL) { return NULL; } diff --git a/Objects/interpreteridobject.c b/Objects/interpreteridobject.c index 16e27b64c0c9c2..4844d6a9bf781c 100644 --- a/Objects/interpreteridobject.c +++ b/Objects/interpreteridobject.c @@ -1,8 +1,7 @@ /* InterpreterID object */ #include "Python.h" -#include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_interp.h" // _PyInterpreterState_LookUpID() +#include "pycore_interp.h" // _PyInterpreterState_LookUpID() #include "interpreteridobject.h" @@ -11,6 +10,21 @@ typedef struct interpid { int64_t id; } interpid; +int64_t +_PyInterpreterID_GetID(PyObject *self) +{ + if (!PyObject_TypeCheck(self, &PyInterpreterID_Type)) { + PyErr_Format(PyExc_TypeError, + "expected an InterpreterID, got %R", + self); + return -1; + + } + int64_t id = ((interpid *)self)->id; + assert(id >= 0); + return id; +} + static interpid * newinterpid(PyTypeObject *cls, int64_t id, int force) { @@ -42,43 +56,19 @@ newinterpid(PyTypeObject *cls, int64_t id, int force) return self; } -static int -interp_id_converter(PyObject *arg, void *ptr) -{ - int64_t id; - if (PyObject_TypeCheck(arg, &PyInterpreterID_Type)) { - id = ((interpid *)arg)->id; - } - else if (_PyIndex_Check(arg)) { - id = PyLong_AsLongLong(arg); - if (id == -1 && PyErr_Occurred()) { - return 0; - } - if (id < 0) { - PyErr_Format(PyExc_ValueError, - "interpreter ID must be a non-negative int, got %R", arg); - return 0; - } - } - else { - PyErr_Format(PyExc_TypeError, - "interpreter ID must be an int, got %.100s", - Py_TYPE(arg)->tp_name); - return 0; - } - *(int64_t *)ptr = id; - return 1; -} - static PyObject * interpid_new(PyTypeObject *cls, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"id", "force", NULL}; - int64_t id; + PyObject *idobj; int force = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O&|$p:InterpreterID.__init__", kwlist, - interp_id_converter, &id, &force)) { + "O|$p:InterpreterID.__init__", kwlist, + &idobj, &force)) { + return NULL; + } + int64_t id = _PyInterpreterState_ObjectToID(idobj); + if (id < 0) { return NULL; } @@ -282,13 +272,3 @@ PyInterpreterState_GetIDObject(PyInterpreterState *interp) } return (PyObject *)newinterpid(&PyInterpreterID_Type, id, 0); } - -PyInterpreterState * -PyInterpreterID_LookUp(PyObject *requested_id) -{ - int64_t id; - if (!interp_id_converter(requested_id, &id)) { - return NULL; - } - return _PyInterpreterState_LookUpID(id); -} diff --git a/Python/pystate.c b/Python/pystate.c index 5a334e8721e63b..5332b8a827d7e8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2,6 +2,8 @@ /* Thread and interpreter state structures and their interfaces */ #include "Python.h" +#include "interpreteridobject.h" // PyInterpreterID_Type +#include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_ceval.h" #include "pycore_code.h" // stats #include "pycore_critical_section.h" // _PyCriticalSection_Resume() @@ -1064,6 +1066,73 @@ _PyInterpreterState_FailIfRunningMain(PyInterpreterState *interp) // accessors //---------- +PyObject * +PyUnstable_InterpreterState_GetMainModule(PyInterpreterState *interp) +{ + PyObject *modules = _PyImport_GetModules(interp); + if (modules == NULL) { + PyErr_SetString(PyExc_RuntimeError, "interpreter not initialized"); + return NULL; + } + return PyMapping_GetItemString(modules, "__main__"); +} + +PyObject * +PyInterpreterState_GetDict(PyInterpreterState *interp) +{ + if (interp->dict == NULL) { + interp->dict = PyDict_New(); + if (interp->dict == NULL) { + PyErr_Clear(); + } + } + /* Returning NULL means no per-interpreter dict is available. */ + return interp->dict; +} + + +//---------- +// interp ID +//---------- + +int64_t +_PyInterpreterState_ObjectToID(PyObject *idobj) +{ + if (PyObject_TypeCheck(idobj, &PyInterpreterID_Type)) { + return _PyInterpreterID_GetID(idobj); + } + + if (!_PyIndex_Check(idobj)) { + PyErr_Format(PyExc_TypeError, + "interpreter ID must be an int, got %.100s", + Py_TYPE(idobj)->tp_name); + return -1; + } + + // This may raise OverflowError. + // For now, we don't worry about if LLONG_MAX < INT64_MAX. + long long id = PyLong_AsLongLong(idobj); + if (id == -1 && PyErr_Occurred()) { + return -1; + } + + if (id < 0) { + PyErr_Format(PyExc_ValueError, + "interpreter ID must be a non-negative int, got %R", + idobj); + return -1; + } +#if LLONG_MAX > INT64_MAX + else if (id > INT64_MAX) { + PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + return -1; + } +#endif + else { + return (int64_t)id; + } +} + int64_t PyInterpreterState_GetID(PyInterpreterState *interp) { @@ -1142,30 +1211,6 @@ _PyInterpreterState_RequireIDRef(PyInterpreterState *interp, int required) interp->requires_idref = required ? 1 : 0; } -PyObject * -PyUnstable_InterpreterState_GetMainModule(PyInterpreterState *interp) -{ - PyObject *modules = _PyImport_GetModules(interp); - if (modules == NULL) { - PyErr_SetString(PyExc_RuntimeError, "interpreter not initialized"); - return NULL; - } - return PyMapping_GetItemString(modules, "__main__"); -} - -PyObject * -PyInterpreterState_GetDict(PyInterpreterState *interp) -{ - if (interp->dict == NULL) { - interp->dict = PyDict_New(); - if (interp->dict == NULL) { - PyErr_Clear(); - } - } - /* Returning NULL means no per-interpreter dict is available. */ - return interp->dict; -} - //----------------------------- // look up an interpreter state @@ -1227,6 +1272,16 @@ _PyInterpreterState_LookUpID(int64_t requested_id) return interp; } +PyInterpreterState * +_PyInterpreterState_LookUpIDObject(PyObject *requested_id) +{ + int64_t id = _PyInterpreterState_ObjectToID(requested_id); + if (id < 0) { + return NULL; + } + return _PyInterpreterState_LookUpID(id); +} + /********************************/ /* the per-thread runtime state */ From 5a76d1be8ef371b75ca65166726923c249b5f615 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 21 Mar 2024 10:06:35 -0600 Subject: [PATCH 02/39] gh-105716: Update interp->threads.main After Fork (gh-117049) I missed this in gh-109921. We also update Py_Exit() to call _PyInterpreterState_SetNotRunningMain(), if necessary. --- Include/internal/pycore_pystate.h | 3 +++ Modules/posixmodule.c | 4 ++++ Python/pylifecycle.c | 4 ++++ Python/pystate.c | 35 +++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 6f9e6a332a7830..9aa439229cc8ea 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -83,6 +83,9 @@ PyAPI_FUNC(void) _PyInterpreterState_SetNotRunningMain(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IsRunningMain(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_FailIfRunningMain(PyInterpreterState *); +extern int _PyThreadState_IsRunningMain(PyThreadState *); +extern void _PyInterpreterState_ReinitRunningMain(PyThreadState *); + static inline const PyConfig * _Py_GetMainConfig(void) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 644d4ba1f65b38..88679164fc3aab 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -646,6 +646,7 @@ PyOS_AfterFork_Child(void) PyThreadState *tstate = _PyThreadState_GET(); _Py_EnsureTstateNotNULL(tstate); + assert(tstate->thread_id == PyThread_get_thread_ident()); #ifdef PY_HAVE_THREAD_NATIVE_ID tstate->native_thread_id = PyThread_get_thread_native_id(); #endif @@ -655,6 +656,9 @@ PyOS_AfterFork_Child(void) _Py_qsbr_after_fork((_PyThreadStateImpl *)tstate); #endif + // Ideally we could guarantee tstate is running main. + _PyInterpreterState_ReinitRunningMain(tstate); + status = _PyEval_ReInitThreads(tstate); if (_PyStatus_EXCEPTION(status)) { goto fatal_error; diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index bc76822e72c54a..4bd01ad23e573f 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -3138,6 +3138,10 @@ call_ll_exitfuncs(_PyRuntimeState *runtime) void _Py_NO_RETURN Py_Exit(int sts) { + PyThreadState *tstate = _PyThreadState_GET(); + if (tstate != NULL && _PyThreadState_IsRunningMain(tstate)) { + _PyInterpreterState_SetNotRunningMain(tstate->interp); + } if (Py_FinalizeEx() < 0) { sts = 120; } diff --git a/Python/pystate.c b/Python/pystate.c index 5332b8a827d7e8..6d63eac22a470c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1050,6 +1050,30 @@ _PyInterpreterState_IsRunningMain(PyInterpreterState *interp) return 0; } +#ifndef NDEBUG +static int +is_running_main(PyThreadState *tstate) +{ + if (tstate->interp->threads.main != NULL) { + return tstate == tstate->interp->threads.main; + } + return 0; +} +#endif + +int +_PyThreadState_IsRunningMain(PyThreadState *tstate) +{ + PyInterpreterState *interp = tstate->interp; + if (interp->threads.main != NULL) { + return tstate == interp->threads.main; + } + if (_Py_IsMainInterpreter(interp)) { + return tstate->thread_id == interp->runtime->main_thread; + } + return 0; +} + int _PyInterpreterState_FailIfRunningMain(PyInterpreterState *interp) { @@ -1061,6 +1085,15 @@ _PyInterpreterState_FailIfRunningMain(PyInterpreterState *interp) return 0; } +void +_PyInterpreterState_ReinitRunningMain(PyThreadState *tstate) +{ + PyInterpreterState *interp = tstate->interp; + if (interp->threads.main != tstate) { + interp->threads.main = NULL; + } +} + //---------- // accessors @@ -1543,6 +1576,7 @@ PyThreadState_Clear(PyThreadState *tstate) { assert(tstate->_status.initialized && !tstate->_status.cleared); assert(current_fast_get()->interp == tstate->interp); + assert(!is_running_main(tstate)); // XXX assert(!tstate->_status.bound || tstate->_status.unbound); tstate->_status.finalizing = 1; // just in case @@ -1641,6 +1675,7 @@ tstate_delete_common(PyThreadState *tstate) assert(tstate->_status.cleared && !tstate->_status.finalized); assert(tstate->state != _Py_THREAD_ATTACHED); tstate_verify_not_active(tstate); + assert(!is_running_main(tstate)); PyInterpreterState *interp = tstate->interp; if (interp == NULL) { From 8bea6c411d65cd987616b4ecdb86373e4f21f1c6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 21 Mar 2024 17:07:00 +0100 Subject: [PATCH 03/39] gh-115754: Add Py_GetConstant() function (#116883) Add Py_GetConstant() and Py_GetConstantBorrowed() functions. In the limited C API version 3.13, getting Py_None, Py_False, Py_True, Py_Ellipsis and Py_NotImplemented singletons is now implemented as function calls at the stable ABI level to hide implementation details. Getting these constants still return borrowed references. Add _testlimitedcapi/object.c and test_capi/test_object.py to test Py_GetConstant() and Py_GetConstantBorrowed() functions. --- Doc/c-api/object.rst | 49 ++++++++++++ Doc/data/stable_abi.dat | 2 + Doc/whatsnew/3.13.rst | 5 ++ Include/boolobject.h | 9 ++- Include/internal/pycore_object.h | 2 + Include/object.h | 31 ++++++- Include/sliceobject.h | 6 +- Lib/test/test_capi/test_object.py | 54 +++++++++++++ Lib/test/test_stable_abi_ctypes.py | 2 + ...-03-15-23-55-24.gh-issue-115754.xnzc__.rst | 3 + ...-03-15-23-57-33.gh-issue-115754.zLdv82.rst | 5 ++ Misc/stable_abi.toml | 4 + Modules/Setup.stdlib.in | 2 +- Modules/_testlimitedcapi.c | 3 + Modules/_testlimitedcapi/object.c | 80 +++++++++++++++++++ Modules/_testlimitedcapi/parts.h | 1 + Objects/object.c | 51 ++++++++++++ PC/python3dll.c | 2 + PCbuild/_testlimitedcapi.vcxproj | 1 + PCbuild/_testlimitedcapi.vcxproj.filters | 1 + Python/pylifecycle.c | 4 + Tools/c-analyzer/cpython/ignored.tsv | 1 + 22 files changed, 312 insertions(+), 6 deletions(-) create mode 100644 Lib/test/test_capi/test_object.py create mode 100644 Misc/NEWS.d/next/C API/2024-03-15-23-55-24.gh-issue-115754.xnzc__.rst create mode 100644 Misc/NEWS.d/next/C API/2024-03-15-23-57-33.gh-issue-115754.zLdv82.rst create mode 100644 Modules/_testlimitedcapi/object.c diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 12476412799a4f..ba454db9117504 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -6,6 +6,55 @@ Object Protocol =============== +.. c:function:: PyObject* Py_GetConstant(unsigned int constant_id) + + Get a :term:`strong reference` to a constant. + + Set an exception and return ``NULL`` if *constant_id* is invalid. + + *constant_id* must be one of these constant identifiers: + + .. c:namespace:: NULL + + ======================================== ===== ========================= + Constant Identifier Value Returned object + ======================================== ===== ========================= + .. c:macro:: Py_CONSTANT_NONE ``0`` :py:data:`None` + .. c:macro:: Py_CONSTANT_FALSE ``1`` :py:data:`False` + .. c:macro:: Py_CONSTANT_TRUE ``2`` :py:data:`True` + .. c:macro:: Py_CONSTANT_ELLIPSIS ``3`` :py:data:`Ellipsis` + .. c:macro:: Py_CONSTANT_NOT_IMPLEMENTED ``4`` :py:data:`NotImplemented` + .. c:macro:: Py_CONSTANT_ZERO ``5`` ``0`` + .. c:macro:: Py_CONSTANT_ONE ``6`` ``1`` + .. c:macro:: Py_CONSTANT_EMPTY_STR ``7`` ``''`` + .. c:macro:: Py_CONSTANT_EMPTY_BYTES ``8`` ``b''`` + .. c:macro:: Py_CONSTANT_EMPTY_TUPLE ``9`` ``()`` + ======================================== ===== ========================= + + Numeric values are only given for projects which cannot use the constant + identifiers. + + + .. versionadded:: 3.13 + + .. impl-detail:: + + In CPython, all of these constants are :term:`immortal`. + + +.. c:function:: PyObject* Py_GetConstantBorrowed(unsigned int constant_id) + + Similar to :c:func:`Py_GetConstant`, but return a :term:`borrowed + reference`. + + This function is primarily intended for backwards compatibility: + using :c:func:`Py_GetConstant` is recommended for new code. + + The reference is borrowed from the interpreter, and is valid until the + interpreter finalization. + .. versionadded:: 3.13 + + .. c:var:: PyObject* Py_NotImplemented The ``NotImplemented`` singleton, used to signal that an operation is diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 9d0ad3d036dac3..565f134290802a 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -838,6 +838,8 @@ function,Py_GenericAlias,3.9,, var,Py_GenericAliasType,3.9,, function,Py_GetBuildInfo,3.2,, function,Py_GetCompiler,3.2,, +function,Py_GetConstant,3.13,, +function,Py_GetConstantBorrowed,3.13,, function,Py_GetCopyright,3.2,, function,Py_GetExecPrefix,3.2,, function,Py_GetPath,3.2,, diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 40e2e6a8e03be9..caadf4a3d68fb2 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1731,6 +1731,11 @@ New Features more information. (Contributed by Victor Stinner in :gh:`111696`.) +* Add :c:func:`Py_GetConstant` and :c:func:`Py_GetConstantBorrowed` functions + to get constants. For example, ``Py_GetConstant(Py_CONSTANT_ZERO)`` returns a + :term:`strong reference` to the constant zero. + (Contributed by Victor Stinner in :gh:`115754`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/boolobject.h b/Include/boolobject.h index 19aef5b1b87c6a..3037e61bbf6d0c 100644 --- a/Include/boolobject.h +++ b/Include/boolobject.h @@ -18,8 +18,13 @@ PyAPI_DATA(PyLongObject) _Py_FalseStruct; PyAPI_DATA(PyLongObject) _Py_TrueStruct; /* Use these macros */ -#define Py_False _PyObject_CAST(&_Py_FalseStruct) -#define Py_True _PyObject_CAST(&_Py_TrueStruct) +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030D0000 +# define Py_False Py_GetConstantBorrowed(Py_CONSTANT_FALSE) +# define Py_True Py_GetConstantBorrowed(Py_CONSTANT_TRUE) +#else +# define Py_False _PyObject_CAST(&_Py_FalseStruct) +# define Py_True _PyObject_CAST(&_Py_TrueStruct) +#endif // Test if an object is the True singleton, the same as "x is True" in Python. PyAPI_FUNC(int) Py_IsTrue(PyObject *x); diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 759ec4d17b5eb4..13fe543133f11e 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -716,6 +716,8 @@ PyAPI_DATA(PyTypeObject) _PyNotImplemented_Type; // Export for the stable ABI. PyAPI_DATA(int) _Py_SwappedOp[]; +extern void _Py_GetConstant_Init(void); + #ifdef __cplusplus } #endif diff --git a/Include/object.h b/Include/object.h index b0c0dba06ca139..67a5e514c421c3 100644 --- a/Include/object.h +++ b/Include/object.h @@ -1068,12 +1068,34 @@ static inline PyObject* _Py_XNewRef(PyObject *obj) #endif +#define Py_CONSTANT_NONE 0 +#define Py_CONSTANT_FALSE 1 +#define Py_CONSTANT_TRUE 2 +#define Py_CONSTANT_ELLIPSIS 3 +#define Py_CONSTANT_NOT_IMPLEMENTED 4 +#define Py_CONSTANT_ZERO 5 +#define Py_CONSTANT_ONE 6 +#define Py_CONSTANT_EMPTY_STR 7 +#define Py_CONSTANT_EMPTY_BYTES 8 +#define Py_CONSTANT_EMPTY_TUPLE 9 + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(PyObject*) Py_GetConstant(unsigned int constant_id); +PyAPI_FUNC(PyObject*) Py_GetConstantBorrowed(unsigned int constant_id); +#endif + + /* _Py_NoneStruct is an object of undefined type which can be used in contexts where NULL (nil) is not suitable (since NULL often means 'error'). */ PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ -#define Py_None (&_Py_NoneStruct) + +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030D0000 +# define Py_None Py_GetConstantBorrowed(Py_CONSTANT_NONE) +#else +# define Py_None (&_Py_NoneStruct) +#endif // Test if an object is the None singleton, the same as "x is None" in Python. PyAPI_FUNC(int) Py_IsNone(PyObject *x); @@ -1087,7 +1109,12 @@ Py_NotImplemented is a singleton used to signal that an operation is not implemented for a given type combination. */ PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */ -#define Py_NotImplemented (&_Py_NotImplementedStruct) + +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030D0000 +# define Py_NotImplemented Py_GetConstantBorrowed(Py_CONSTANT_NOT_IMPLEMENTED) +#else +# define Py_NotImplemented (&_Py_NotImplementedStruct) +#endif /* Macro for returning Py_NotImplemented from a function */ #define Py_RETURN_NOTIMPLEMENTED return Py_NotImplemented diff --git a/Include/sliceobject.h b/Include/sliceobject.h index c13863f27c2e63..35e2ea254ca80a 100644 --- a/Include/sliceobject.h +++ b/Include/sliceobject.h @@ -8,7 +8,11 @@ extern "C" { PyAPI_DATA(PyObject) _Py_EllipsisObject; /* Don't use this directly */ -#define Py_Ellipsis (&_Py_EllipsisObject) +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030D0000 +# define Py_Ellipsis Py_GetConstantBorrowed(Py_CONSTANT_ELLIPSIS) +#else +# define Py_Ellipsis (&_Py_EllipsisObject) +#endif /* Slice object interface */ diff --git a/Lib/test/test_capi/test_object.py b/Lib/test/test_capi/test_object.py new file mode 100644 index 00000000000000..c80e9b653789ad --- /dev/null +++ b/Lib/test/test_capi/test_object.py @@ -0,0 +1,54 @@ +import enum +import unittest +from test.support import import_helper + +_testlimitedcapi = import_helper.import_module('_testlimitedcapi') + + +class Constant(enum.IntEnum): + Py_CONSTANT_NONE = 0 + Py_CONSTANT_FALSE = 1 + Py_CONSTANT_TRUE = 2 + Py_CONSTANT_ELLIPSIS = 3 + Py_CONSTANT_NOT_IMPLEMENTED = 4 + Py_CONSTANT_ZERO = 5 + Py_CONSTANT_ONE = 6 + Py_CONSTANT_EMPTY_STR = 7 + Py_CONSTANT_EMPTY_BYTES = 8 + Py_CONSTANT_EMPTY_TUPLE = 9 + + INVALID_CONSTANT = Py_CONSTANT_EMPTY_TUPLE + 1 + + +class CAPITest(unittest.TestCase): + def check_get_constant(self, get_constant): + self.assertIs(get_constant(Constant.Py_CONSTANT_NONE), None) + self.assertIs(get_constant(Constant.Py_CONSTANT_FALSE), False) + self.assertIs(get_constant(Constant.Py_CONSTANT_TRUE), True) + self.assertIs(get_constant(Constant.Py_CONSTANT_ELLIPSIS), Ellipsis) + self.assertIs(get_constant(Constant.Py_CONSTANT_NOT_IMPLEMENTED), NotImplemented) + + for constant_id, constant_type, value in ( + (Constant.Py_CONSTANT_ZERO, int, 0), + (Constant.Py_CONSTANT_ONE, int, 1), + (Constant.Py_CONSTANT_EMPTY_STR, str, ""), + (Constant.Py_CONSTANT_EMPTY_BYTES, bytes, b""), + (Constant.Py_CONSTANT_EMPTY_TUPLE, tuple, ()), + ): + with self.subTest(constant_id=constant_id): + obj = get_constant(constant_id) + self.assertEqual(type(obj), constant_type, obj) + self.assertEqual(obj, value) + + with self.assertRaises(SystemError): + get_constant(Constant.INVALID_CONSTANT) + + def test_get_constant(self): + self.check_get_constant(_testlimitedcapi.get_constant) + + def test_get_constant_borrowed(self): + self.check_get_constant(_testlimitedcapi.get_constant_borrowed) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 117c27d27b38dc..0e304853d399aa 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -856,6 +856,8 @@ def test_windows_feature_macros(self): "Py_GetArgcArgv", "Py_GetBuildInfo", "Py_GetCompiler", + "Py_GetConstant", + "Py_GetConstantBorrowed", "Py_GetCopyright", "Py_GetExecPrefix", "Py_GetPath", diff --git a/Misc/NEWS.d/next/C API/2024-03-15-23-55-24.gh-issue-115754.xnzc__.rst b/Misc/NEWS.d/next/C API/2024-03-15-23-55-24.gh-issue-115754.xnzc__.rst new file mode 100644 index 00000000000000..d76c98ee54056d --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-03-15-23-55-24.gh-issue-115754.xnzc__.rst @@ -0,0 +1,3 @@ +Add :c:func:`Py_GetConstant` and :c:func:`Py_GetConstantBorrowed` functions to +get constants. For example, ``Py_GetConstant(Py_CONSTANT_ZERO)`` returns a +:term:`strong reference` to the constant zero. Patch by Victor Stinner. diff --git a/Misc/NEWS.d/next/C API/2024-03-15-23-57-33.gh-issue-115754.zLdv82.rst b/Misc/NEWS.d/next/C API/2024-03-15-23-57-33.gh-issue-115754.zLdv82.rst new file mode 100644 index 00000000000000..feff0c0897eae1 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-03-15-23-57-33.gh-issue-115754.zLdv82.rst @@ -0,0 +1,5 @@ +In the limited C API version 3.13, getting ``Py_None``, ``Py_False``, +``Py_True``, ``Py_Ellipsis`` and ``Py_NotImplemented`` singletons is now +implemented as function calls at the stable ABI level to hide implementation +details. Getting these constants still return borrowed references. Patch by +Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index c68adf8db079f9..6d6e484b0b6c67 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2500,3 +2500,7 @@ added = '3.13' [function.PyType_GetModuleName] added = '3.13' +[function.Py_GetConstant] + added = '3.13' +[function.Py_GetConstantBorrowed] + added = '3.13' diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 1b7ad0e5d95a13..09d6f3b2bb7e8d 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -163,7 +163,7 @@ @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c @MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c -@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c +@MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c diff --git a/Modules/_testlimitedcapi.c b/Modules/_testlimitedcapi.c index bfd41070eedd55..598071fe0ddbad 100644 --- a/Modules/_testlimitedcapi.c +++ b/Modules/_testlimitedcapi.c @@ -53,6 +53,9 @@ PyInit__testlimitedcapi(void) if (_PyTestLimitedCAPI_Init_Long(mod) < 0) { return NULL; } + if (_PyTestLimitedCAPI_Init_Object(mod) < 0) { + return NULL; + } if (_PyTestLimitedCAPI_Init_PyOS(mod) < 0) { return NULL; } diff --git a/Modules/_testlimitedcapi/object.c b/Modules/_testlimitedcapi/object.c new file mode 100644 index 00000000000000..6e438c811d6e98 --- /dev/null +++ b/Modules/_testlimitedcapi/object.c @@ -0,0 +1,80 @@ +// Need limited C API version 3.13 for Py_GetConstant() +#include "pyconfig.h" // Py_GIL_DISABLED +#if !defined(Py_GIL_DISABLED) && !defined(Py_LIMITED_API ) +# define Py_LIMITED_API 0x030d0000 +#endif + +#include "parts.h" +#include "util.h" + + +/* Test Py_GetConstant() */ +static PyObject * +get_constant(PyObject *Py_UNUSED(module), PyObject *args) +{ + int constant_id; + if (!PyArg_ParseTuple(args, "i", &constant_id)) { + return NULL; + } + + PyObject *obj = Py_GetConstant(constant_id); + if (obj == NULL) { + assert(PyErr_Occurred()); + return NULL; + } + return obj; +} + + +/* Test Py_GetConstantBorrowed() */ +static PyObject * +get_constant_borrowed(PyObject *Py_UNUSED(module), PyObject *args) +{ + int constant_id; + if (!PyArg_ParseTuple(args, "i", &constant_id)) { + return NULL; + } + + PyObject *obj = Py_GetConstantBorrowed(constant_id); + if (obj == NULL) { + assert(PyErr_Occurred()); + return NULL; + } + return Py_NewRef(obj); +} + + +/* Test constants */ +static PyObject * +test_constants(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + // Test that implementation of constants in the limited C API: + // check that the C code compiles. + // + // Test also that constants and Py_GetConstant() return the same + // objects. + assert(Py_None == Py_GetConstant(Py_CONSTANT_NONE)); + assert(Py_False == Py_GetConstant(Py_CONSTANT_FALSE)); + assert(Py_True == Py_GetConstant(Py_CONSTANT_TRUE)); + assert(Py_Ellipsis == Py_GetConstant(Py_CONSTANT_ELLIPSIS)); + assert(Py_NotImplemented == Py_GetConstant(Py_CONSTANT_NOT_IMPLEMENTED)); + // Other constants are tested in test_capi.test_object + Py_RETURN_NONE; +} + +static PyMethodDef test_methods[] = { + {"get_constant", get_constant, METH_VARARGS}, + {"get_constant_borrowed", get_constant_borrowed, METH_VARARGS}, + {"test_constants", test_constants, METH_NOARGS}, + {NULL}, +}; + +int +_PyTestLimitedCAPI_Init_Object(PyObject *m) +{ + if (PyModule_AddFunctions(m, test_methods) < 0) { + return -1; + } + + return 0; +} diff --git a/Modules/_testlimitedcapi/parts.h b/Modules/_testlimitedcapi/parts.h index 4b65912489661b..d91f174cd31eed 100644 --- a/Modules/_testlimitedcapi/parts.h +++ b/Modules/_testlimitedcapi/parts.h @@ -29,6 +29,7 @@ int _PyTestLimitedCAPI_Init_Complex(PyObject *module); int _PyTestLimitedCAPI_Init_Dict(PyObject *module); int _PyTestLimitedCAPI_Init_Float(PyObject *module); int _PyTestLimitedCAPI_Init_HeaptypeRelative(PyObject *module); +int _PyTestLimitedCAPI_Init_Object(PyObject *module); int _PyTestLimitedCAPI_Init_List(PyObject *module); int _PyTestLimitedCAPI_Init_Long(PyObject *module); int _PyTestLimitedCAPI_Init_PyOS(PyObject *module); diff --git a/Objects/object.c b/Objects/object.c index fcb8cf481657e5..0d03292c9115cd 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -14,6 +14,7 @@ #include "pycore_memoryobject.h" // _PyManagedBuffer_Type #include "pycore_namespace.h" // _PyNamespace_Type #include "pycore_object.h" // PyAPI_DATA() _Py_SwappedOp definition +#include "pycore_long.h" // _PyLong_GetZero() #include "pycore_optimizer.h" // _PyUOpExecutor_Type, _PyUOpOptimizer_Type, ... #include "pycore_pyerrors.h" // _PyErr_Occurred() #include "pycore_pymem.h" // _PyMem_IsPtrFreed() @@ -2991,3 +2992,53 @@ _Py_SetRefcnt(PyObject *ob, Py_ssize_t refcnt) { Py_SET_REFCNT(ob, refcnt); } + + +static PyObject* constants[] = { + &_Py_NoneStruct, // Py_CONSTANT_NONE + (PyObject*)(&_Py_FalseStruct), // Py_CONSTANT_FALSE + (PyObject*)(&_Py_TrueStruct), // Py_CONSTANT_TRUE + &_Py_EllipsisObject, // Py_CONSTANT_ELLIPSIS + &_Py_NotImplementedStruct, // Py_CONSTANT_NOT_IMPLEMENTED + NULL, // Py_CONSTANT_ZERO + NULL, // Py_CONSTANT_ONE + NULL, // Py_CONSTANT_EMPTY_STR + NULL, // Py_CONSTANT_EMPTY_BYTES + NULL, // Py_CONSTANT_EMPTY_TUPLE +}; + +void +_Py_GetConstant_Init(void) +{ + constants[Py_CONSTANT_ZERO] = _PyLong_GetZero(); + constants[Py_CONSTANT_ONE] = _PyLong_GetOne(); + constants[Py_CONSTANT_EMPTY_STR] = PyUnicode_New(0, 0); + constants[Py_CONSTANT_EMPTY_BYTES] = PyBytes_FromStringAndSize(NULL, 0); + constants[Py_CONSTANT_EMPTY_TUPLE] = PyTuple_New(0); +#ifndef NDEBUG + for (size_t i=0; i < Py_ARRAY_LENGTH(constants); i++) { + assert(constants[i] != NULL); + assert(_Py_IsImmortal(constants[i])); + } +#endif +} + +PyObject* +Py_GetConstant(unsigned int constant_id) +{ + if (constant_id < Py_ARRAY_LENGTH(constants)) { + return constants[constant_id]; + } + else { + PyErr_BadInternalCall(); + return NULL; + } +} + + +PyObject* +Py_GetConstantBorrowed(unsigned int constant_id) +{ + // All constants are immortal + return Py_GetConstant(constant_id); +} diff --git a/PC/python3dll.c b/PC/python3dll.c index dbfa3f23bb586d..147bfad44c3741 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -55,6 +55,8 @@ EXPORT_FUNC(Py_GenericAlias) EXPORT_FUNC(Py_GetArgcArgv) EXPORT_FUNC(Py_GetBuildInfo) EXPORT_FUNC(Py_GetCompiler) +EXPORT_FUNC(Py_GetConstant) +EXPORT_FUNC(Py_GetConstantBorrowed) EXPORT_FUNC(Py_GetCopyright) EXPORT_FUNC(Py_GetExecPrefix) EXPORT_FUNC(Py_GetPath) diff --git a/PCbuild/_testlimitedcapi.vcxproj b/PCbuild/_testlimitedcapi.vcxproj index 58a8bcbdbce4e8..252039d93103bd 100644 --- a/PCbuild/_testlimitedcapi.vcxproj +++ b/PCbuild/_testlimitedcapi.vcxproj @@ -103,6 +103,7 @@ + diff --git a/PCbuild/_testlimitedcapi.vcxproj.filters b/PCbuild/_testlimitedcapi.vcxproj.filters index e203edaf123e8d..7efbb0acf8f960 100644 --- a/PCbuild/_testlimitedcapi.vcxproj.filters +++ b/PCbuild/_testlimitedcapi.vcxproj.filters @@ -18,6 +18,7 @@ + diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 4bd01ad23e573f..683534d342f437 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -687,6 +687,10 @@ pycore_init_global_objects(PyInterpreterState *interp) _PyUnicode_InitState(interp); + if (_Py_IsMainInterpreter(interp)) { + _Py_GetConstant_Init(); + } + return _PyStatus_OK(); } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 0024e2683052c8..965346b9b04a32 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -742,3 +742,4 @@ Modules/_sqlite/module.c - _sqlite3module - Modules/clinic/md5module.c.h _md5_md5 _keywords - Modules/clinic/grpmodule.c.h grp_getgrgid _keywords - Modules/clinic/grpmodule.c.h grp_getgrnam _keywords - +Objects/object.c - constants static PyObject*[] From abdd1f938f08e536864532b2071f144515ecc88b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 21 Mar 2024 17:45:43 +0100 Subject: [PATCH 04/39] gh-85283: Build _testconsole extension with limited C API (#117125) --- Doc/whatsnew/3.13.rst | 2 +- ...4-03-14-10-33-58.gh-issue-85283.LOgmdU.rst | 5 +- PC/_testconsole.c | 35 ++++--- PC/clinic/_testconsole.c.h | 99 ++++--------------- 4 files changed, 43 insertions(+), 98 deletions(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index caadf4a3d68fb2..7e6c79dbf50aac 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1527,7 +1527,7 @@ Build Changes * The ``errno``, ``fcntl``, ``grp``, ``md5``, ``pwd``, ``resource``, ``termios``, ``winsound``, ``_ctypes_test``, ``_multiprocessing.posixshmem``, ``_scproxy``, ``_stat``, - ``_statistics``, ``_testimportmultiple`` and ``_uuid`` + ``_statistics``, ``_testconsole``, ``_testimportmultiple`` and ``_uuid`` C extensions are now built with the :ref:`limited C API `. (Contributed by Victor Stinner in :gh:`85283`.) diff --git a/Misc/NEWS.d/next/C API/2024-03-14-10-33-58.gh-issue-85283.LOgmdU.rst b/Misc/NEWS.d/next/C API/2024-03-14-10-33-58.gh-issue-85283.LOgmdU.rst index ef8a934b435a88..c8e6b1b1e6ed62 100644 --- a/Misc/NEWS.d/next/C API/2024-03-14-10-33-58.gh-issue-85283.LOgmdU.rst +++ b/Misc/NEWS.d/next/C API/2024-03-14-10-33-58.gh-issue-85283.LOgmdU.rst @@ -1,2 +1,3 @@ -The ``fcntl``, ``grp``, ``pwd``, ``termios`` and ``_statistics`` C extensions are now -built with the :ref:`limited C API `. Patch by Victor Stinner. +The ``fcntl``, ``grp``, ``pwd``, ``termios``, ``_statistics`` and +``_testconsole`` C extensions are now built with the :ref:`limited C API +`. Patch by Victor Stinner. diff --git a/PC/_testconsole.c b/PC/_testconsole.c index 1dc0d230c4d7c3..f1ace003df483b 100644 --- a/PC/_testconsole.c +++ b/PC/_testconsole.c @@ -1,17 +1,16 @@ /* Testing module for multi-phase initialization of extension modules (PEP 489) */ -#ifndef Py_BUILD_CORE_BUILTIN -# define Py_BUILD_CORE_MODULE 1 +// Need limited C API version 3.12 for Py_MOD_PER_INTERPRETER_GIL_SUPPORTED +#include "pyconfig.h" // Py_GIL_DISABLED +#ifndef Py_GIL_DISABLED +# define Py_LIMITED_API 0x030c0000 #endif #include "Python.h" #ifdef MS_WINDOWS -#include "pycore_fileutils.h" // _Py_get_osfhandle() -#include "pycore_runtime.h" // _Py_ID() - #define WIN32_LEAN_AND_MEAN #include #include @@ -57,20 +56,24 @@ module _testconsole _testconsole.write_input file: object - s: PyBytesObject + s: Py_buffer Writes UTF-16-LE encoded bytes to the console as if typed by a user. [clinic start generated code]*/ static PyObject * -_testconsole_write_input_impl(PyObject *module, PyObject *file, - PyBytesObject *s) -/*[clinic end generated code: output=48f9563db34aedb3 input=4c774f2d05770bc6]*/ +_testconsole_write_input_impl(PyObject *module, PyObject *file, Py_buffer *s) +/*[clinic end generated code: output=58631a8985426ad3 input=68062f1bb2e52206]*/ { INPUT_RECORD *rec = NULL; - PyTypeObject *winconsoleio_type = (PyTypeObject *)_PyImport_GetModuleAttr( - &_Py_ID(_io), &_Py_ID(_WindowsConsoleIO)); + PyObject *mod = PyImport_ImportModule("_io"); + if (mod == NULL) { + return NULL; + } + + PyTypeObject *winconsoleio_type = (PyTypeObject *)PyObject_GetAttrString(mod, "_WindowsConsoleIO"); + Py_DECREF(mod); if (winconsoleio_type == NULL) { return NULL; } @@ -81,8 +84,8 @@ _testconsole_write_input_impl(PyObject *module, PyObject *file, return NULL; } - const wchar_t *p = (const wchar_t *)PyBytes_AS_STRING(s); - DWORD size = (DWORD)PyBytes_GET_SIZE(s) / sizeof(wchar_t); + const wchar_t *p = (const wchar_t *)s->buf; + DWORD size = (DWORD)s->len / sizeof(wchar_t); rec = (INPUT_RECORD*)PyMem_Calloc(size, sizeof(INPUT_RECORD)); if (!rec) @@ -96,9 +99,11 @@ _testconsole_write_input_impl(PyObject *module, PyObject *file, prec->Event.KeyEvent.uChar.UnicodeChar = *p; } - HANDLE hInput = _Py_get_osfhandle(((winconsoleio*)file)->fd); - if (hInput == INVALID_HANDLE_VALUE) + HANDLE hInput = (HANDLE)_get_osfhandle(((winconsoleio*)file)->fd); + if (hInput == INVALID_HANDLE_VALUE) { + PyErr_SetFromErrno(PyExc_OSError); goto error; + } DWORD total = 0; while (total < size) { diff --git a/PC/clinic/_testconsole.c.h b/PC/clinic/_testconsole.c.h index 2c71c11c438b5b..4c11e545499ac5 100644 --- a/PC/clinic/_testconsole.c.h +++ b/PC/clinic/_testconsole.c.h @@ -2,12 +2,6 @@ preserve [clinic start generated code]*/ -#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) -# include "pycore_gc.h" // PyGC_Head -# include "pycore_runtime.h" // _Py_ID() -#endif -#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() - #if defined(MS_WINDOWS) PyDoc_STRVAR(_testconsole_write_input__doc__, @@ -17,58 +11,30 @@ PyDoc_STRVAR(_testconsole_write_input__doc__, "Writes UTF-16-LE encoded bytes to the console as if typed by a user."); #define _TESTCONSOLE_WRITE_INPUT_METHODDEF \ - {"write_input", _PyCFunction_CAST(_testconsole_write_input), METH_FASTCALL|METH_KEYWORDS, _testconsole_write_input__doc__}, + {"write_input", (PyCFunction)(void(*)(void))_testconsole_write_input, METH_VARARGS|METH_KEYWORDS, _testconsole_write_input__doc__}, static PyObject * -_testconsole_write_input_impl(PyObject *module, PyObject *file, - PyBytesObject *s); +_testconsole_write_input_impl(PyObject *module, PyObject *file, Py_buffer *s); static PyObject * -_testconsole_write_input(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_testconsole_write_input(PyObject *module, PyObject *args, PyObject *kwargs) { PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 2 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(file), &_Py_ID(s), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"file", "s", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "write_input", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[2]; + static char *_keywords[] = {"file", "s", NULL}; PyObject *file; - PyBytesObject *s; + Py_buffer s = {NULL, NULL}; - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf); - if (!args) { - goto exit; - } - file = args[0]; - if (!PyBytes_Check(args[1])) { - _PyArg_BadArgument("write_input", "argument 's'", "bytes", args[1]); + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oy*:write_input", _keywords, + &file, &s)) goto exit; - } - s = (PyBytesObject *)args[1]; - return_value = _testconsole_write_input_impl(module, file, s); + return_value = _testconsole_write_input_impl(module, file, &s); exit: + /* Cleanup for s */ + if (s.obj) { + PyBuffer_Release(&s); + } + return return_value; } @@ -83,48 +49,21 @@ PyDoc_STRVAR(_testconsole_read_output__doc__, "Reads a str from the console as written to stdout."); #define _TESTCONSOLE_READ_OUTPUT_METHODDEF \ - {"read_output", _PyCFunction_CAST(_testconsole_read_output), METH_FASTCALL|METH_KEYWORDS, _testconsole_read_output__doc__}, + {"read_output", (PyCFunction)(void(*)(void))_testconsole_read_output, METH_VARARGS|METH_KEYWORDS, _testconsole_read_output__doc__}, static PyObject * _testconsole_read_output_impl(PyObject *module, PyObject *file); static PyObject * -_testconsole_read_output(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_testconsole_read_output(PyObject *module, PyObject *args, PyObject *kwargs) { PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 1 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(file), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"file", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "read_output", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[1]; + static char *_keywords[] = {"file", NULL}; PyObject *file; - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); - if (!args) { + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O:read_output", _keywords, + &file)) goto exit; - } - file = args[0]; return_value = _testconsole_read_output_impl(module, file); exit: @@ -140,4 +79,4 @@ _testconsole_read_output(PyObject *module, PyObject *const *args, Py_ssize_t nar #ifndef _TESTCONSOLE_READ_OUTPUT_METHODDEF #define _TESTCONSOLE_READ_OUTPUT_METHODDEF #endif /* !defined(_TESTCONSOLE_READ_OUTPUT_METHODDEF) */ -/*[clinic end generated code: output=08a1c844b3657272 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d60ce07157e3741a input=a9049054013a1b77]*/ From 617158e07811edfd6fd552a3d84b0beedd8f1d18 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 21 Mar 2024 11:15:02 -0600 Subject: [PATCH 05/39] gh-76785: Drop PyInterpreterID_Type (gh-117101) I added it quite a while ago as a strategy for managing interpreter lifetimes relative to the PEP 554 (now 734) implementation. Relatively recently I refactored that implementation to no longer rely on InterpreterID objects. Thus now I'm removing it. --- Include/cpython/interpreteridobject.h | 14 - Include/internal/pycore_interp.h | 5 +- Include/interpreteridobject.h | 17 -- Lib/test/test_capi/test_misc.py | 308 ++++++++++++++------ Makefile.pre.in | 3 - Modules/_interpreters_common.h | 17 ++ Modules/_testcapimodule.c | 8 - Modules/_testinternalcapi.c | 119 ++++++++ Modules/_xxinterpchannelsmodule.c | 5 +- Modules/_xxsubinterpretersmodule.c | 17 +- Objects/interpreteridobject.c | 274 ----------------- Objects/object.c | 3 - PCbuild/_freeze_module.vcxproj | 1 - PCbuild/_freeze_module.vcxproj.filters | 3 - PCbuild/pythoncore.vcxproj | 3 - PCbuild/pythoncore.vcxproj.filters | 9 - Python/pystate.c | 5 - Tools/c-analyzer/cpython/globals-to-fix.tsv | 1 - 18 files changed, 363 insertions(+), 449 deletions(-) delete mode 100644 Include/cpython/interpreteridobject.h delete mode 100644 Include/interpreteridobject.h delete mode 100644 Objects/interpreteridobject.c diff --git a/Include/cpython/interpreteridobject.h b/Include/cpython/interpreteridobject.h deleted file mode 100644 index d425c909806e44..00000000000000 --- a/Include/cpython/interpreteridobject.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef Py_CPYTHON_INTERPRETERIDOBJECT_H -# error "this header file must not be included directly" -#endif - -/* Interpreter ID Object */ - -PyAPI_DATA(PyTypeObject) PyInterpreterID_Type; - -PyAPI_FUNC(PyObject *) PyInterpreterID_New(int64_t); -PyAPI_FUNC(PyObject *) PyInterpreterState_GetIDObject(PyInterpreterState *); - -#ifdef Py_BUILD_CORE -extern int64_t _PyInterpreterID_GetID(PyObject *); -#endif diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index b28e8a3ff45f3f..b8d0fdcce11ba8 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -295,12 +295,11 @@ _PyInterpreterState_SetFinalizing(PyInterpreterState *interp, PyThreadState *tst } -extern int64_t _PyInterpreterState_ObjectToID(PyObject *); -// Export for the _xxinterpchannels module. +// Exports for the _testinternalcapi module. +PyAPI_FUNC(int64_t) _PyInterpreterState_ObjectToID(PyObject *); PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpID(int64_t); PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpIDObject(PyObject *); - PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); diff --git a/Include/interpreteridobject.h b/Include/interpreteridobject.h deleted file mode 100644 index 8432632f339e92..00000000000000 --- a/Include/interpreteridobject.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef Py_INTERPRETERIDOBJECT_H -#define Py_INTERPRETERIDOBJECT_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef Py_LIMITED_API -# define Py_CPYTHON_INTERPRETERIDOBJECT_H -# include "cpython/interpreteridobject.h" -# undef Py_CPYTHON_INTERPRETERIDOBJECT_H -#endif - -#ifdef __cplusplus -} -#endif -#endif /* !Py_INTERPRETERIDOBJECT_H */ diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index fe5e19d46d8b6c..55a1ab6d6d9359 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2207,132 +2207,264 @@ def test_module_state_shared_in_global(self): @requires_subinterpreters class InterpreterIDTests(unittest.TestCase): - InterpreterID = _testcapi.get_interpreterid_type() - - def new_interpreter(self): - def ensure_destroyed(interpid): + def add_interp_cleanup(self, interpid): + def ensure_destroyed(): try: _interpreters.destroy(interpid) except _interpreters.InterpreterNotFoundError: pass + self.addCleanup(ensure_destroyed) + + def new_interpreter(self): id = _interpreters.create() - self.addCleanup(lambda: ensure_destroyed(id)) + self.add_interp_cleanup(id) return id - def test_with_int(self): - id = self.InterpreterID(10, force=True) - - self.assertEqual(int(id), 10) + def test_conversion_int(self): + convert = _testinternalcapi.normalize_interp_id + interpid = convert(10) + self.assertEqual(interpid, 10) - def test_coerce_id(self): - class Int(str): + def test_conversion_coerced(self): + convert = _testinternalcapi.normalize_interp_id + class MyInt(str): def __index__(self): return 10 + interpid = convert(MyInt()) + self.assertEqual(interpid, 10) - id = self.InterpreterID(Int(), force=True) - self.assertEqual(int(id), 10) + def test_conversion_from_interpreter(self): + convert = _testinternalcapi.normalize_interp_id + interpid = self.new_interpreter() + converted = convert(interpid) + self.assertEqual(converted, interpid) + + def test_conversion_bad(self): + convert = _testinternalcapi.normalize_interp_id - def test_bad_id(self): for badid in [ object(), 10.0, '10', b'10', ]: - with self.subTest(badid): + with self.subTest(f'bad: {badid!r}'): with self.assertRaises(TypeError): - self.InterpreterID(badid) + convert(badid) badid = -1 - with self.subTest(badid): + with self.subTest(f'bad: {badid!r}'): with self.assertRaises(ValueError): - self.InterpreterID(badid) + convert(badid) badid = 2**64 - with self.subTest(badid): + with self.subTest(f'bad: {badid!r}'): with self.assertRaises(OverflowError): - self.InterpreterID(badid) + convert(badid) - def test_exists(self): - id = self.new_interpreter() - with self.assertRaises(_interpreters.InterpreterNotFoundError): - self.InterpreterID(int(id) + 1) # unforced + def test_lookup_exists(self): + interpid = self.new_interpreter() + self.assertTrue( + _testinternalcapi.interpreter_exists(interpid)) - def test_does_not_exist(self): - id = self.new_interpreter() - with self.assertRaises(_interpreters.InterpreterNotFoundError): - self.InterpreterID(int(id) + 1) # unforced + def test_lookup_does_not_exist(self): + interpid = _testinternalcapi.unused_interpreter_id() + self.assertFalse( + _testinternalcapi.interpreter_exists(interpid)) - def test_destroyed(self): - id = _interpreters.create() - _interpreters.destroy(id) - with self.assertRaises(_interpreters.InterpreterNotFoundError): - self.InterpreterID(id) # unforced - - def test_str(self): - id = self.InterpreterID(10, force=True) - self.assertEqual(str(id), '10') - - def test_repr(self): - id = self.InterpreterID(10, force=True) - self.assertEqual(repr(id), 'InterpreterID(10)') - - def test_equality(self): - id1 = self.new_interpreter() - id2 = self.InterpreterID(id1) - id3 = self.InterpreterID( - self.new_interpreter()) - - self.assertTrue(id2 == id2) # identity - self.assertTrue(id2 == id1) # int-equivalent - self.assertTrue(id1 == id2) # reversed - self.assertTrue(id2 == int(id2)) - self.assertTrue(id2 == float(int(id2))) - self.assertTrue(float(int(id2)) == id2) - self.assertFalse(id2 == float(int(id2)) + 0.1) - self.assertFalse(id2 == str(int(id2))) - self.assertFalse(id2 == 2**1000) - self.assertFalse(id2 == float('inf')) - self.assertFalse(id2 == 'spam') - self.assertFalse(id2 == id3) - - self.assertFalse(id2 != id2) - self.assertFalse(id2 != id1) - self.assertFalse(id1 != id2) - self.assertTrue(id2 != id3) - - def test_linked_lifecycle(self): - id1 = _interpreters.create() - _testinternalcapi.unlink_interpreter_refcount(id1) + def test_lookup_destroyed(self): + interpid = _interpreters.create() + _interpreters.destroy(interpid) + self.assertFalse( + _testinternalcapi.interpreter_exists(interpid)) + + def test_linked_lifecycle_does_not_exist(self): + exists = _testinternalcapi.interpreter_exists + is_linked = _testinternalcapi.interpreter_refcount_linked + link = _testinternalcapi.link_interpreter_refcount + unlink = _testinternalcapi.unlink_interpreter_refcount + get_refcount = _testinternalcapi.get_interpreter_refcount + incref = _testinternalcapi.interpreter_incref + decref = _testinternalcapi.interpreter_decref + + with self.subTest('never existed'): + interpid = _testinternalcapi.unused_interpreter_id() + self.assertFalse( + exists(interpid)) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + is_linked(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + link(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + unlink(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + get_refcount(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + incref(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + decref(interpid) + + with self.subTest('destroyed'): + interpid = _interpreters.create() + _interpreters.destroy(interpid) + self.assertFalse( + exists(interpid)) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + is_linked(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + link(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + unlink(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + get_refcount(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + incref(interpid) + with self.assertRaises(_interpreters.InterpreterNotFoundError): + decref(interpid) + + def test_linked_lifecycle_initial(self): + is_linked = _testinternalcapi.interpreter_refcount_linked + get_refcount = _testinternalcapi.get_interpreter_refcount + + # A new interpreter will start out not linked, with a refcount of 0. + interpid = _testinternalcapi.new_interpreter() + self.add_interp_cleanup(interpid) + linked = is_linked(interpid) + refcount = get_refcount(interpid) + + self.assertFalse(linked) + self.assertEqual(refcount, 0) + + def test_linked_lifecycle_never_linked(self): + exists = _testinternalcapi.interpreter_exists + is_linked = _testinternalcapi.interpreter_refcount_linked + get_refcount = _testinternalcapi.get_interpreter_refcount + incref = _testinternalcapi.interpreter_incref + decref = _testinternalcapi.interpreter_decref + + interpid = _testinternalcapi.new_interpreter() + self.add_interp_cleanup(interpid) + + # Incref will not automatically link it. + incref(interpid) + self.assertFalse( + is_linked(interpid)) + self.assertEqual( + 1, get_refcount(interpid)) + + # It isn't linked so it isn't destroyed. + decref(interpid) + self.assertTrue( + exists(interpid)) + self.assertFalse( + is_linked(interpid)) + self.assertEqual( + 0, get_refcount(interpid)) + + def test_linked_lifecycle_link_unlink(self): + exists = _testinternalcapi.interpreter_exists + is_linked = _testinternalcapi.interpreter_refcount_linked + link = _testinternalcapi.link_interpreter_refcount + unlink = _testinternalcapi.unlink_interpreter_refcount + + interpid = _testinternalcapi.new_interpreter() + self.add_interp_cleanup(interpid) + + # Linking at refcount 0 does not destroy the interpreter. + link(interpid) + self.assertTrue( + exists(interpid)) + self.assertTrue( + is_linked(interpid)) + + # Unlinking at refcount 0 does not destroy the interpreter. + unlink(interpid) + self.assertTrue( + exists(interpid)) + self.assertFalse( + is_linked(interpid)) + + def test_linked_lifecycle_link_incref_decref(self): + exists = _testinternalcapi.interpreter_exists + is_linked = _testinternalcapi.interpreter_refcount_linked + link = _testinternalcapi.link_interpreter_refcount + get_refcount = _testinternalcapi.get_interpreter_refcount + incref = _testinternalcapi.interpreter_incref + decref = _testinternalcapi.interpreter_decref + + interpid = _testinternalcapi.new_interpreter() + self.add_interp_cleanup(interpid) + + # Linking it will not change the refcount. + link(interpid) + self.assertTrue( + is_linked(interpid)) self.assertEqual( - _testinternalcapi.get_interpreter_refcount(id1), - 0) + 0, get_refcount(interpid)) - id2 = self.InterpreterID(id1) + # Decref with a refcount of 0 is not allowed. + incref(interpid) self.assertEqual( - _testinternalcapi.get_interpreter_refcount(id1), - 1) + 1, get_refcount(interpid)) - # The interpreter isn't linked to ID objects, so it isn't destroyed. - del id2 + # When linked, decref back to 0 destroys the interpreter. + decref(interpid) + self.assertFalse( + exists(interpid)) + + def test_linked_lifecycle_incref_link(self): + is_linked = _testinternalcapi.interpreter_refcount_linked + link = _testinternalcapi.link_interpreter_refcount + get_refcount = _testinternalcapi.get_interpreter_refcount + incref = _testinternalcapi.interpreter_incref + + interpid = _testinternalcapi.new_interpreter() + self.add_interp_cleanup(interpid) + + incref(interpid) self.assertEqual( - _testinternalcapi.get_interpreter_refcount(id1), - 0) + 1, get_refcount(interpid)) - _testinternalcapi.link_interpreter_refcount(id1) + # Linking it will not reset the refcount. + link(interpid) + self.assertTrue( + is_linked(interpid)) self.assertEqual( - _testinternalcapi.get_interpreter_refcount(id1), - 0) + 1, get_refcount(interpid)) + + def test_linked_lifecycle_link_incref_unlink_decref(self): + exists = _testinternalcapi.interpreter_exists + is_linked = _testinternalcapi.interpreter_refcount_linked + link = _testinternalcapi.link_interpreter_refcount + unlink = _testinternalcapi.unlink_interpreter_refcount + get_refcount = _testinternalcapi.get_interpreter_refcount + incref = _testinternalcapi.interpreter_incref + decref = _testinternalcapi.interpreter_decref + + interpid = _testinternalcapi.new_interpreter() + self.add_interp_cleanup(interpid) - id3 = self.InterpreterID(id1) + link(interpid) + self.assertTrue( + is_linked(interpid)) + + incref(interpid) + self.assertEqual( + 1, get_refcount(interpid)) + + # Unlinking it will not change the refcount. + unlink(interpid) + self.assertFalse( + is_linked(interpid)) self.assertEqual( - _testinternalcapi.get_interpreter_refcount(id1), - 1) + 1, get_refcount(interpid)) - # The interpreter is linked now so is destroyed. - del id3 - with self.assertRaises(_interpreters.InterpreterNotFoundError): - _testinternalcapi.get_interpreter_refcount(id1) + # Unlinked: decref back to 0 does not destroys the interpreter. + decref(interpid) + self.assertTrue( + exists(interpid)) + self.assertEqual( + 0, get_refcount(interpid)) class BuiltinStaticTypesTests(unittest.TestCase): diff --git a/Makefile.pre.in b/Makefile.pre.in index cacf14a52cb68e..c454f31aae1e57 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -507,7 +507,6 @@ OBJECT_OBJS= \ Objects/floatobject.o \ Objects/frameobject.o \ Objects/funcobject.o \ - Objects/interpreteridobject.o \ Objects/iterobject.o \ Objects/listobject.o \ Objects/longobject.o \ @@ -1003,7 +1002,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/frameobject.h \ $(srcdir)/Include/genericaliasobject.h \ $(srcdir)/Include/import.h \ - $(srcdir)/Include/interpreteridobject.h \ $(srcdir)/Include/intrcheck.h \ $(srcdir)/Include/iterobject.h \ $(srcdir)/Include/listobject.h \ @@ -1077,7 +1075,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/cpython/genobject.h \ $(srcdir)/Include/cpython/import.h \ $(srcdir)/Include/cpython/initconfig.h \ - $(srcdir)/Include/cpython/interpreteridobject.h \ $(srcdir)/Include/cpython/listobject.h \ $(srcdir)/Include/cpython/longintrepr.h \ $(srcdir)/Include/cpython/longobject.h \ diff --git a/Modules/_interpreters_common.h b/Modules/_interpreters_common.h index 07120f6ccc7207..de9a60ce657e0c 100644 --- a/Modules/_interpreters_common.h +++ b/Modules/_interpreters_common.h @@ -19,3 +19,20 @@ clear_xid_class(PyTypeObject *cls) return _PyCrossInterpreterData_UnregisterClass(cls); } #endif + + +#ifdef RETURNS_INTERPID_OBJECT +static PyObject * +get_interpid_obj(PyInterpreterState *interp) +{ + if (_PyInterpreterState_IDInitref(interp) != 0) { + return NULL; + }; + int64_t id = PyInterpreterState_GetID(interp); + if (id < 0) { + return NULL; + } + assert(id < LLONG_MAX); + return PyLong_FromLongLong(id); +} +#endif diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index e68d083955d64a..16b5e1d257eed2 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -13,7 +13,6 @@ #include "_testcapi/parts.h" #include "frameobject.h" // PyFrame_New() -#include "interpreteridobject.h" // PyInterpreterID_Type #include "marshal.h" // PyMarshal_WriteLongToFile() #include // FLT_MAX @@ -1449,12 +1448,6 @@ run_in_subinterp(PyObject *self, PyObject *args) return PyLong_FromLong(r); } -static PyObject * -get_interpreterid_type(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ - return Py_NewRef(&PyInterpreterID_Type); -} - static PyMethodDef ml; static PyObject * @@ -3299,7 +3292,6 @@ static PyMethodDef TestMethods[] = { {"crash_no_current_thread", crash_no_current_thread, METH_NOARGS}, {"test_current_tstate_matches", test_current_tstate_matches, METH_NOARGS}, {"run_in_subinterp", run_in_subinterp, METH_VARARGS}, - {"get_interpreterid_type", get_interpreterid_type, METH_NOARGS}, {"create_cfunction", create_cfunction, METH_NOARGS}, {"call_in_temporary_c_thread", call_in_temporary_c_thread, METH_VARARGS, PyDoc_STR("set_error_class(error_class) -> None")}, diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index f73a29e5afe801..e1717f7a66b1de 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1475,6 +1475,83 @@ run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) } +static PyObject * +normalize_interp_id(PyObject *self, PyObject *idobj) +{ + int64_t interpid = _PyInterpreterState_ObjectToID(idobj); + if (interpid < 0) { + return NULL; + } + return PyLong_FromLongLong(interpid); +} + +static PyObject * +unused_interpreter_id(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + int64_t interpid = INT64_MAX; + assert(interpid > _PyRuntime.interpreters.next_id); + return PyLong_FromLongLong(interpid); +} + +static PyObject * +new_interpreter(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + // Unlike _interpreters.create(), we do not automatically link + // the interpreter to its refcount. + PyThreadState *save_tstate = PyThreadState_Get(); + const PyInterpreterConfig config = \ + (PyInterpreterConfig)_PyInterpreterConfig_INIT; + PyThreadState *tstate = NULL; + PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config); + PyThreadState_Swap(save_tstate); + if (PyStatus_Exception(status)) { + _PyErr_SetFromPyStatus(status); + return NULL; + } + PyInterpreterState *interp = PyThreadState_GetInterpreter(tstate); + + if (_PyInterpreterState_IDInitref(interp) < 0) { + goto error; + } + + int64_t interpid = PyInterpreterState_GetID(interp); + if (interpid < 0) { + goto error; + } + PyObject *idobj = PyLong_FromLongLong(interpid); + if (idobj == NULL) { + goto error; + } + + PyThreadState_Swap(tstate); + PyThreadState_Clear(tstate); + PyThreadState_Swap(save_tstate); + PyThreadState_Delete(tstate); + + return idobj; + +error: + save_tstate = PyThreadState_Swap(tstate); + Py_EndInterpreter(tstate); + PyThreadState_Swap(save_tstate); + return NULL; +} + +static PyObject * +interpreter_exists(PyObject *self, PyObject *idobj) +{ + PyInterpreterState *interp = _PyInterpreterState_LookUpIDObject(idobj); + if (interp == NULL) { + if (PyErr_ExceptionMatches(PyExc_InterpreterNotFoundError)) { + PyErr_Clear(); + Py_RETURN_FALSE; + } + assert(PyErr_Occurred()); + return NULL; + } + Py_RETURN_TRUE; +} + static PyObject * get_interpreter_refcount(PyObject *self, PyObject *idobj) { @@ -1509,6 +1586,41 @@ unlink_interpreter_refcount(PyObject *self, PyObject *idobj) Py_RETURN_NONE; } +static PyObject * +interpreter_refcount_linked(PyObject *self, PyObject *idobj) +{ + PyInterpreterState *interp = _PyInterpreterState_LookUpIDObject(idobj); + if (interp == NULL) { + return NULL; + } + if (_PyInterpreterState_RequiresIDRef(interp)) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; +} + +static PyObject * +interpreter_incref(PyObject *self, PyObject *idobj) +{ + PyInterpreterState *interp = _PyInterpreterState_LookUpIDObject(idobj); + if (interp == NULL) { + return NULL; + } + _PyInterpreterState_IDIncref(interp); + Py_RETURN_NONE; +} + +static PyObject * +interpreter_decref(PyObject *self, PyObject *idobj) +{ + PyInterpreterState *interp = _PyInterpreterState_LookUpIDObject(idobj); + if (interp == NULL) { + return NULL; + } + _PyInterpreterState_IDDecref(interp); + Py_RETURN_NONE; +} + static void _xid_capsule_destructor(PyObject *capsule) @@ -1749,9 +1861,16 @@ static PyMethodDef module_functions[] = { {"run_in_subinterp_with_config", _PyCFunction_CAST(run_in_subinterp_with_config), METH_VARARGS | METH_KEYWORDS}, + {"normalize_interp_id", normalize_interp_id, METH_O}, + {"unused_interpreter_id", unused_interpreter_id, METH_NOARGS}, + {"new_interpreter", new_interpreter, METH_NOARGS}, + {"interpreter_exists", interpreter_exists, METH_O}, {"get_interpreter_refcount", get_interpreter_refcount, METH_O}, {"link_interpreter_refcount", link_interpreter_refcount, METH_O}, {"unlink_interpreter_refcount", unlink_interpreter_refcount, METH_O}, + {"interpreter_refcount_linked", interpreter_refcount_linked, METH_O}, + {"interpreter_incref", interpreter_incref, METH_O}, + {"interpreter_decref", interpreter_decref, METH_O}, {"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS}, {"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS}, {"get_crossinterp_data", get_crossinterp_data, METH_VARARGS}, diff --git a/Modules/_xxinterpchannelsmodule.c b/Modules/_xxinterpchannelsmodule.c index 28ec00a159d6cd..b63a3aab8263bc 100644 --- a/Modules/_xxinterpchannelsmodule.c +++ b/Modules/_xxinterpchannelsmodule.c @@ -6,7 +6,6 @@ #endif #include "Python.h" -#include "interpreteridobject.h" #include "pycore_crossinterp.h" // struct _xid #include "pycore_interp.h" // _PyInterpreterState_LookUpID() @@ -18,7 +17,9 @@ #endif #define REGISTERS_HEAP_TYPES +#define RETURNS_INTERPID_OBJECT #include "_interpreters_common.h" +#undef RETURNS_INTERPID_OBJECT #undef REGISTERS_HEAP_TYPES @@ -2908,7 +2909,7 @@ channelsmod_list_interpreters(PyObject *self, PyObject *args, PyObject *kwds) goto except; } if (res) { - interpid_obj = PyInterpreterState_GetIDObject(interp); + interpid_obj = get_interpid_obj(interp); if (interpid_obj == NULL) { goto except; } diff --git a/Modules/_xxsubinterpretersmodule.c b/Modules/_xxsubinterpretersmodule.c index 606b2a36481ce2..befa225c9183c5 100644 --- a/Modules/_xxsubinterpretersmodule.c +++ b/Modules/_xxsubinterpretersmodule.c @@ -16,10 +16,11 @@ #include "pycore_pyerrors.h" // _Py_excinfo #include "pycore_pystate.h" // _PyInterpreterState_SetRunningMain() -#include "interpreteridobject.h" #include "marshal.h" // PyMarshal_ReadObjectFromString() +#define RETURNS_INTERPID_OBJECT #include "_interpreters_common.h" +#undef RETURNS_INTERPID_OBJECT #define MODULE_NAME _xxsubinterpreters @@ -38,20 +39,6 @@ _get_current_interp(void) #define look_up_interp _PyInterpreterState_LookUpIDObject -static PyObject * -get_interpid_obj(PyInterpreterState *interp) -{ - if (_PyInterpreterState_IDInitref(interp) != 0) { - return NULL; - }; - int64_t id = PyInterpreterState_GetID(interp); - if (id < 0) { - return NULL; - } - assert(id < LLONG_MAX); - return PyLong_FromLongLong(id); -} - static PyObject * _get_current_module(void) { diff --git a/Objects/interpreteridobject.c b/Objects/interpreteridobject.c deleted file mode 100644 index 4844d6a9bf781c..00000000000000 --- a/Objects/interpreteridobject.c +++ /dev/null @@ -1,274 +0,0 @@ -/* InterpreterID object */ - -#include "Python.h" -#include "pycore_interp.h" // _PyInterpreterState_LookUpID() -#include "interpreteridobject.h" - - -typedef struct interpid { - PyObject_HEAD - int64_t id; -} interpid; - -int64_t -_PyInterpreterID_GetID(PyObject *self) -{ - if (!PyObject_TypeCheck(self, &PyInterpreterID_Type)) { - PyErr_Format(PyExc_TypeError, - "expected an InterpreterID, got %R", - self); - return -1; - - } - int64_t id = ((interpid *)self)->id; - assert(id >= 0); - return id; -} - -static interpid * -newinterpid(PyTypeObject *cls, int64_t id, int force) -{ - PyInterpreterState *interp = _PyInterpreterState_LookUpID(id); - if (interp == NULL) { - if (force) { - PyErr_Clear(); - } - else { - return NULL; - } - } - - if (interp != NULL) { - if (_PyInterpreterState_IDIncref(interp) < 0) { - return NULL; - } - } - - interpid *self = PyObject_New(interpid, cls); - if (self == NULL) { - if (interp != NULL) { - _PyInterpreterState_IDDecref(interp); - } - return NULL; - } - self->id = id; - - return self; -} - -static PyObject * -interpid_new(PyTypeObject *cls, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"id", "force", NULL}; - PyObject *idobj; - int force = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O|$p:InterpreterID.__init__", kwlist, - &idobj, &force)) { - return NULL; - } - int64_t id = _PyInterpreterState_ObjectToID(idobj); - if (id < 0) { - return NULL; - } - - return (PyObject *)newinterpid(cls, id, force); -} - -static void -interpid_dealloc(PyObject *v) -{ - int64_t id = ((interpid *)v)->id; - PyInterpreterState *interp = _PyInterpreterState_LookUpID(id); - if (interp != NULL) { - _PyInterpreterState_IDDecref(interp); - } - else { - // already deleted - PyErr_Clear(); - } - Py_TYPE(v)->tp_free(v); -} - -static PyObject * -interpid_repr(PyObject *self) -{ - PyTypeObject *type = Py_TYPE(self); - const char *name = _PyType_Name(type); - interpid *id = (interpid *)self; - return PyUnicode_FromFormat("%s(%" PRId64 ")", name, id->id); -} - -static PyObject * -interpid_str(PyObject *self) -{ - interpid *id = (interpid *)self; - return PyUnicode_FromFormat("%" PRId64 "", id->id); -} - -static PyObject * -interpid_int(PyObject *self) -{ - interpid *id = (interpid *)self; - return PyLong_FromLongLong(id->id); -} - -static PyNumberMethods interpid_as_number = { - 0, /* nb_add */ - 0, /* nb_subtract */ - 0, /* nb_multiply */ - 0, /* nb_remainder */ - 0, /* nb_divmod */ - 0, /* nb_power */ - 0, /* nb_negative */ - 0, /* nb_positive */ - 0, /* nb_absolute */ - 0, /* nb_bool */ - 0, /* nb_invert */ - 0, /* nb_lshift */ - 0, /* nb_rshift */ - 0, /* nb_and */ - 0, /* nb_xor */ - 0, /* nb_or */ - (unaryfunc)interpid_int, /* nb_int */ - 0, /* nb_reserved */ - 0, /* nb_float */ - - 0, /* nb_inplace_add */ - 0, /* nb_inplace_subtract */ - 0, /* nb_inplace_multiply */ - 0, /* nb_inplace_remainder */ - 0, /* nb_inplace_power */ - 0, /* nb_inplace_lshift */ - 0, /* nb_inplace_rshift */ - 0, /* nb_inplace_and */ - 0, /* nb_inplace_xor */ - 0, /* nb_inplace_or */ - - 0, /* nb_floor_divide */ - 0, /* nb_true_divide */ - 0, /* nb_inplace_floor_divide */ - 0, /* nb_inplace_true_divide */ - - (unaryfunc)interpid_int, /* nb_index */ -}; - -static Py_hash_t -interpid_hash(PyObject *self) -{ - interpid *id = (interpid *)self; - PyObject *obj = PyLong_FromLongLong(id->id); - if (obj == NULL) { - return -1; - } - Py_hash_t hash = PyObject_Hash(obj); - Py_DECREF(obj); - return hash; -} - -static PyObject * -interpid_richcompare(PyObject *self, PyObject *other, int op) -{ - if (op != Py_EQ && op != Py_NE) { - Py_RETURN_NOTIMPLEMENTED; - } - - if (!PyObject_TypeCheck(self, &PyInterpreterID_Type)) { - Py_RETURN_NOTIMPLEMENTED; - } - - interpid *id = (interpid *)self; - int equal; - if (PyObject_TypeCheck(other, &PyInterpreterID_Type)) { - interpid *otherid = (interpid *)other; - equal = (id->id == otherid->id); - } - else if (PyLong_CheckExact(other)) { - /* Fast path */ - int overflow; - long long otherid = PyLong_AsLongLongAndOverflow(other, &overflow); - if (otherid == -1 && PyErr_Occurred()) { - return NULL; - } - equal = !overflow && (otherid >= 0) && (id->id == otherid); - } - else if (PyNumber_Check(other)) { - PyObject *pyid = PyLong_FromLongLong(id->id); - if (pyid == NULL) { - return NULL; - } - PyObject *res = PyObject_RichCompare(pyid, other, op); - Py_DECREF(pyid); - return res; - } - else { - Py_RETURN_NOTIMPLEMENTED; - } - - if ((op == Py_EQ && equal) || (op == Py_NE && !equal)) { - Py_RETURN_TRUE; - } - Py_RETURN_FALSE; -} - -PyDoc_STRVAR(interpid_doc, -"A interpreter ID identifies a interpreter and may be used as an int."); - -PyTypeObject PyInterpreterID_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "InterpreterID", /* tp_name */ - sizeof(interpid), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)interpid_dealloc, /* tp_dealloc */ - 0, /* tp_vectorcall_offset */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_as_async */ - (reprfunc)interpid_repr, /* tp_repr */ - &interpid_as_number, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - interpid_hash, /* tp_hash */ - 0, /* tp_call */ - (reprfunc)interpid_str, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ - interpid_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - interpid_richcompare, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - interpid_new, /* tp_new */ -}; - -PyObject *PyInterpreterID_New(int64_t id) -{ - return (PyObject *)newinterpid(&PyInterpreterID_Type, id, 0); -} - -PyObject * -PyInterpreterState_GetIDObject(PyInterpreterState *interp) -{ - if (_PyInterpreterState_IDInitref(interp) != 0) { - return NULL; - }; - int64_t id = PyInterpreterState_GetID(interp); - if (id < 0) { - return NULL; - } - return (PyObject *)newinterpid(&PyInterpreterID_Type, id, 0); -} diff --git a/Objects/object.c b/Objects/object.c index 0d03292c9115cd..b4f0fd4d7db941 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -24,8 +24,6 @@ #include "pycore_typevarobject.h" // _PyTypeAlias_Type, _Py_initialize_generic #include "pycore_unionobject.h" // _PyUnion_Type -#include "interpreteridobject.h" // _PyInterpreterID_Type - #ifdef Py_LIMITED_API // Prevent recursive call _Py_IncRef() <=> Py_INCREF() # error "Py_LIMITED_API macro must not be defined" @@ -2240,7 +2238,6 @@ static PyTypeObject* static_types[] = { &PyGen_Type, &PyGetSetDescr_Type, &PyInstanceMethod_Type, - &PyInterpreterID_Type, &PyListIter_Type, &PyListRevIter_Type, &PyList_Type, diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index bce92c91f1ca0d..82471e0f140ec3 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -142,7 +142,6 @@ - diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 5b34440af9322b..97c52fdadf7c05 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -241,9 +241,6 @@ Source Files - - Source Files - Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 9131ce87db6c84..c944bbafdba7e5 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -154,7 +154,6 @@ - @@ -303,7 +302,6 @@ - @@ -504,7 +502,6 @@ - diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 27bd1121663398..0afad125ce1e97 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -330,9 +330,6 @@ Include - - Include - Modules @@ -492,9 +489,6 @@ Include - - Include\cpython - Include\cpython @@ -1475,9 +1469,6 @@ Objects - - Objects - Modules diff --git a/Python/pystate.c b/Python/pystate.c index 6d63eac22a470c..3ef405105a8d46 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2,7 +2,6 @@ /* Thread and interpreter state structures and their interfaces */ #include "Python.h" -#include "interpreteridobject.h" // PyInterpreterID_Type #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_ceval.h" #include "pycore_code.h" // stats @@ -1131,10 +1130,6 @@ PyInterpreterState_GetDict(PyInterpreterState *interp) int64_t _PyInterpreterState_ObjectToID(PyObject *idobj) { - if (PyObject_TypeCheck(idobj, &PyInterpreterID_Type)) { - return _PyInterpreterID_GetID(idobj); - } - if (!_PyIndex_Check(idobj)) { PyErr_Format(PyExc_TypeError, "interpreter ID must be an int, got %.100s", diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 686a3d3160cc90..92fab9b3998636 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -54,7 +54,6 @@ Objects/genobject.c - _PyAsyncGenASend_Type - Objects/genobject.c - _PyAsyncGenAThrow_Type - Objects/genobject.c - _PyAsyncGenWrappedValue_Type - Objects/genobject.c - _PyCoroWrapper_Type - -Objects/interpreteridobject.c - PyInterpreterID_Type - Objects/iterobject.c - PyCallIter_Type - Objects/iterobject.c - PySeqIter_Type - Objects/iterobject.c - _PyAnextAwaitable_Type - From 50369e6c34d05222e5a0ec9443a9f7b230e83112 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 21 Mar 2024 13:27:46 -0400 Subject: [PATCH 06/39] gh-116996: Add pystats about _Py_uop_analyse_and_optimize (GH-116997) --- Include/cpython/pystats.h | 7 ++++++- Include/internal/pycore_code.h | 2 ++ Python/optimizer_analysis.c | 11 +++++++++-- Python/specialize.c | 13 +++++++++++++ Tools/scripts/summarize_stats.py | 26 ++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 3 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 887fbbedf88502..5bf7bacd514699 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -19,6 +19,8 @@ // Define _PY_INTERPRETER macro to increment interpreter_increfs and // interpreter_decrefs. Otherwise, increment increfs and decrefs. +#include "pycore_uop_ids.h" + #ifndef Py_CPYTHON_PYSTATS_H # error "this header file must not be included directly" #endif @@ -116,7 +118,7 @@ typedef struct _optimization_stats { uint64_t recursive_call; uint64_t low_confidence; uint64_t executors_invalidated; - UOpStats opcode[512]; + UOpStats opcode[MAX_UOP_ID]; uint64_t unsupported_opcode[256]; uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE]; @@ -124,6 +126,9 @@ typedef struct _optimization_stats { uint64_t optimizer_attempts; uint64_t optimizer_successes; uint64_t optimizer_failure_reason_no_memory; + uint64_t remove_globals_builtins_changed; + uint64_t remove_globals_incorrect_keys; + uint64_t error_in_opcode[MAX_UOP_ID]; } OptimizationStats; typedef struct _rare_event_stats { diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 8eabd49a18afa9..e004783ee48198 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -308,6 +308,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co); #define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) #define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0) #define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0) +#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0) #define OPT_HIST(length, name) \ do { \ if (_Py_stats) { \ @@ -334,6 +335,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define OPT_STAT_INC(name) ((void)0) #define UOP_STAT_INC(opname, name) ((void)0) #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) +#define OPT_ERROR_IN_OPCODE(opname) ((void)0) #define OPT_HIST(length, name) ((void)0) #define RARE_EVENT_STAT_INC(name) ((void)0) #endif // !Py_STATS diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 603ac6815665ca..6c460c5359d71e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -139,6 +139,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *builtins = frame->f_builtins; if (builtins != interp->builtins) { + OPT_STAT_INC(remove_globals_builtins_changed); return 1; } PyObject *globals = frame->f_globals; @@ -170,6 +171,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, switch(opcode) { case _GUARD_BUILTINS_VERSION: if (incorrect_keys(inst, builtins)) { + OPT_STAT_INC(remove_globals_incorrect_keys); return 0; } if (interp->rare_events.builtin_dict >= _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS) { @@ -190,6 +192,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, break; case _GUARD_GLOBALS_VERSION: if (incorrect_keys(inst, globals)) { + OPT_STAT_INC(remove_globals_incorrect_keys); return 0; } uint64_t watched_mutations = get_mutations(globals); @@ -238,6 +241,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, globals = func->func_globals; builtins = func->func_builtins; if (builtins != interp->builtins) { + OPT_STAT_INC(remove_globals_builtins_changed); return 1; } break; @@ -358,6 +362,7 @@ optimize_uops( _Py_UOpsContext context; _Py_UOpsContext *ctx = &context; + uint32_t opcode = UINT16_MAX; if (_Py_uop_abstractcontext_init(ctx) < 0) { goto out_of_space; @@ -374,8 +379,7 @@ optimize_uops( this_instr++) { int oparg = this_instr->oparg; - uint32_t opcode = this_instr->opcode; - + opcode = this_instr->opcode; _Py_UopsSymbol **stack_pointer = ctx->frame->stack_pointer; #ifdef Py_DEBUG @@ -410,6 +414,9 @@ optimize_uops( error: DPRINTF(3, "\n"); DPRINTF(1, "Encountered error in abstract interpreter\n"); + if (opcode <= MAX_UOP_ID) { + OPT_ERROR_IN_OPCODE(opcode); + } _Py_uop_abstractcontext_fini(ctx); return -1; diff --git a/Python/specialize.c b/Python/specialize.c index b1f9eb756c3665..801ab1f2e64e5d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -247,6 +247,8 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes); fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n", stats->optimizer_failure_reason_no_memory); + fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed); + fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys); const char* const* names; for (int i = 0; i <= MAX_UOP_ID; i++) { @@ -268,6 +270,17 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) ); } } + + for (int i = 0; i < MAX_UOP_ID; i++) { + if (stats->error_in_opcode[i]) { + fprintf( + out, + "error_in_opcode[%s].count : %" PRIu64 "\n", + _PyUOpName(i), + stats->error_in_opcode[i] + ); + } + } } static void diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 6af14e1b769b80..d40106b8682388 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -513,6 +513,8 @@ def get_optimizer_stats(self) -> dict[str, tuple[int, int | None]]: attempts = self._data["Optimization optimizer attempts"] successes = self._data["Optimization optimizer successes"] no_memory = self._data["Optimization optimizer failure no memory"] + builtins_changed = self._data["Optimizer remove globals builtins changed"] + incorrect_keys = self._data["Optimizer remove globals incorrect keys"] return { Doc( @@ -527,6 +529,14 @@ def get_optimizer_stats(self) -> dict[str, tuple[int, int | None]]: "Optimizer no memory", "The number of optimizations that failed due to no memory.", ): (no_memory, attempts), + Doc( + "Remove globals builtins changed", + "The builtins changed during optimization", + ): (builtins_changed, attempts), + Doc( + "Remove globals incorrect keys", + "The keys in the globals dictionary aren't what was expected", + ): (incorrect_keys, attempts), } def get_histogram(self, prefix: str) -> list[tuple[int, int]]: @@ -1177,6 +1187,17 @@ def calc_unsupported_opcodes_table(stats: Stats) -> Rows: reverse=True, ) + def calc_error_in_opcodes_table(stats: Stats) -> Rows: + error_in_opcodes = stats.get_opcode_stats("error_in_opcode") + return sorted( + [ + (opcode, Count(count)) + for opcode, count in error_in_opcodes.get_opcode_counts().items() + ], + key=itemgetter(1), + reverse=True, + ) + def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None): if not base_stats.get_optimization_stats() or ( head_stats is not None and not head_stats.get_optimization_stats() @@ -1223,6 +1244,11 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) ) ], ) + yield Section( + "Optimizer errored out with opcode", + "Optimization stopped after encountering this opcode", + [Table(("Opcode", "Count:"), calc_error_in_opcodes_table, JoinMode.CHANGE)], + ) return Section( "Optimization (Tier 2) stats", From 1f72fb5447ef3f8892b4a7a6213522579c618e8e Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 21 Mar 2024 14:21:02 -0400 Subject: [PATCH 07/39] gh-116522: Refactor `_PyThreadState_DeleteExcept` (#117131) Split `_PyThreadState_DeleteExcept` into two functions: - `_PyThreadState_RemoveExcept` removes all thread states other than one passed as an argument. It returns the removed thread states as a linked list. - `_PyThreadState_DeleteList` deletes those dead thread states. It may call destructors, so we want to "start the world" before calling `_PyThreadState_DeleteList` to avoid potential deadlocks. --- Include/internal/pycore_pystate.h | 3 ++- Modules/posixmodule.c | 8 +++++++ Python/ceval_gil.c | 7 ++---- Python/pylifecycle.c | 7 ++++-- Python/pystate.c | 39 +++++++++++++++++++------------ 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 9aa439229cc8ea..35e266acd3ab60 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -218,7 +218,8 @@ extern PyThreadState * _PyThreadState_New( PyInterpreterState *interp, int whence); extern void _PyThreadState_Bind(PyThreadState *tstate); -extern void _PyThreadState_DeleteExcept(PyThreadState *tstate); +extern PyThreadState * _PyThreadState_RemoveExcept(PyThreadState *tstate); +extern void _PyThreadState_DeleteList(PyThreadState *list); extern void _PyThreadState_ClearMimallocHeaps(PyThreadState *tstate); // Export for '_testinternalcapi' shared extension diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 88679164fc3aab..a4b635ef5bf629 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -664,6 +664,14 @@ PyOS_AfterFork_Child(void) goto fatal_error; } + // Remove the dead thread states. We "start the world" once we are the only + // thread state left to undo the stop the world call in `PyOS_BeforeFork`. + // That needs to happen before `_PyThreadState_DeleteList`, because that + // may call destructors. + PyThreadState *list = _PyThreadState_RemoveExcept(tstate); + _PyEval_StartTheWorldAll(&_PyRuntime); + _PyThreadState_DeleteList(list); + status = _PyImport_ReInitLock(tstate->interp); if (_PyStatus_EXCEPTION(status)) { goto fatal_error; diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 78c13d619e6ee0..d88ac65c5cf300 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -579,9 +579,8 @@ PyEval_ReleaseThread(PyThreadState *tstate) } #ifdef HAVE_FORK -/* This function is called from PyOS_AfterFork_Child to destroy all threads - which are not running in the child process, and clear internal locks - which might be held by those threads. */ +/* This function is called from PyOS_AfterFork_Child to re-initialize the + GIL and pending calls lock. */ PyStatus _PyEval_ReInitThreads(PyThreadState *tstate) { @@ -598,8 +597,6 @@ _PyEval_ReInitThreads(PyThreadState *tstate) struct _pending_calls *pending = &tstate->interp->ceval.pending; _PyMutex_at_fork_reinit(&pending->mutex); - /* Destroy all threads except the current one */ - _PyThreadState_DeleteExcept(tstate); return _PyStatus_OK(); } #endif diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 683534d342f437..1d315b80d88ce0 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1934,8 +1934,11 @@ Py_FinalizeEx(void) will be called in the current Python thread. Since _PyRuntimeState_SetFinalizing() has been called, no other Python thread can take the GIL at this point: if they try, they will exit - immediately. */ - _PyThreadState_DeleteExcept(tstate); + immediately. We start the world once we are the only thread state left, + before we call destructors. */ + PyThreadState *list = _PyThreadState_RemoveExcept(tstate); + _PyEval_StartTheWorldAll(runtime); + _PyThreadState_DeleteList(list); /* At this point no Python code should be running at all. The only thread state left should be the main thread of the main diff --git a/Python/pystate.c b/Python/pystate.c index 3ef405105a8d46..47d327ae28933b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1763,15 +1763,17 @@ PyThreadState_DeleteCurrent(void) } -/* - * Delete all thread states except the one passed as argument. - * Note that, if there is a current thread state, it *must* be the one - * passed as argument. Also, this won't touch any other interpreters - * than the current one, since we don't know which thread state should - * be kept in those other interpreters. - */ -void -_PyThreadState_DeleteExcept(PyThreadState *tstate) +// Unlinks and removes all thread states from `tstate->interp`, with the +// exception of the one passed as an argument. However, it does not delete +// these thread states. Instead, it returns the removed thread states as a +// linked list. +// +// Note that if there is a current thread state, it *must* be the one +// passed as argument. Also, this won't touch any interpreters other +// than the current one, since we don't know which thread state should +// be kept in those other interpreters. +PyThreadState * +_PyThreadState_RemoveExcept(PyThreadState *tstate) { assert(tstate != NULL); PyInterpreterState *interp = tstate->interp; @@ -1783,8 +1785,7 @@ _PyThreadState_DeleteExcept(PyThreadState *tstate) HEAD_LOCK(runtime); /* Remove all thread states, except tstate, from the linked list of - thread states. This will allow calling PyThreadState_Clear() - without holding the lock. */ + thread states. */ PyThreadState *list = interp->threads.head; if (list == tstate) { list = tstate->next; @@ -1799,11 +1800,19 @@ _PyThreadState_DeleteExcept(PyThreadState *tstate) interp->threads.head = tstate; HEAD_UNLOCK(runtime); - _PyEval_StartTheWorldAll(runtime); + return list; +} + +// Deletes the thread states in the linked list `list`. +// +// This is intended to be used in conjunction with _PyThreadState_RemoveExcept. +void +_PyThreadState_DeleteList(PyThreadState *list) +{ + // The world can't be stopped because we PyThreadState_Clear() can + // call destructors. + assert(!_PyRuntime.stoptheworld.world_stopped); - /* Clear and deallocate all stale thread states. Even if this - executes Python code, we should be safe since it executes - in the current thread, not one of the stale threads. */ PyThreadState *p, *next; for (p = list; p; p = next) { next = p->next; From c85d84166a84a5cb2d724012726bad34229ad24e Mon Sep 17 00:00:00 2001 From: Will Childs-Klein Date: Thu, 21 Mar 2024 14:16:36 -0500 Subject: [PATCH 08/39] gh-116333: Relax error string text expectations in SSL-related tests (GH-116334) * Relax error string text expectations in SSL-related tests As suggested [here][1], this change relaxes the OpenSSL error string text expectations in a number of tests. This was specifically done in support of more easily building CPython [AWS-LC][2], but because AWS-LC is a fork of [BoringSSL][3], it should increase compatibility with that library as well. In addition to the error string relaxations, we also add some guards around the `tls-unique` channel binding being used with TLSv1.3, as that feature (described in [RFC 6929][4]) is [not defined][5] for TLSv1.3. [1]: https://discuss.python.org/t/support-building-ssl-and-hashlib-modules-against-aws-lc/44505/4 [2]: https://github.com/aws/aws-lc [3]: https://github.com/google/boringssl [4]: https://datatracker.ietf.org/doc/html/rfc5929#section-3 [5]: https://datatracker.ietf.org/doc/html/rfc8446#appendix-C.5 --- Lib/test/test_asyncio/test_events.py | 12 ++- Lib/test/test_imaplib.py | 22 +++-- Lib/test/test_ssl.py | 97 +++++++++++++------ ...-03-21-11-32-29.gh-issue-116333.F-9Ram.rst | 3 + 4 files changed, 96 insertions(+), 38 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-03-21-11-32-29.gh-issue-116333.F-9Ram.rst diff --git a/Lib/test/test_asyncio/test_events.py b/Lib/test/test_asyncio/test_events.py index 5b9c871e1d1b5a..ae0bff06479093 100644 --- a/Lib/test/test_asyncio/test_events.py +++ b/Lib/test/test_asyncio/test_events.py @@ -1125,12 +1125,16 @@ def test_create_server_ssl_match_failed(self): # incorrect server_hostname f_c = self.loop.create_connection(MyProto, host, port, ssl=sslcontext_client) + + # Allow for flexible libssl error messages. + regex = re.compile(r"""( + IP address mismatch, certificate is not valid for '127.0.0.1' # OpenSSL + | + CERTIFICATE_VERIFY_FAILED # AWS-LC + )""", re.X) with mock.patch.object(self.loop, 'call_exception_handler'): with test_utils.disable_logger(): - with self.assertRaisesRegex( - ssl.CertificateError, - "IP address mismatch, certificate is not valid for " - "'127.0.0.1'"): + with self.assertRaisesRegex(ssl.CertificateError, regex): self.loop.run_until_complete(f_c) # close connection diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index b97474acca370f..79bf7dbdbb81a0 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -8,6 +8,7 @@ import time import calendar import threading +import re import socket from test.support import verbose, run_with_tz, run_with_locale, cpython_only, requires_resource @@ -558,9 +559,13 @@ def test_ssl_raises(self): self.assertEqual(ssl_context.check_hostname, True) ssl_context.load_verify_locations(CAFILE) - with self.assertRaisesRegex(ssl.CertificateError, - "IP address mismatch, certificate is not valid for " - "'127.0.0.1'"): + # Allow for flexible libssl error messages. + regex = re.compile(r"""( + IP address mismatch, certificate is not valid for '127.0.0.1' # OpenSSL + | + CERTIFICATE_VERIFY_FAILED # AWS-LC + )""", re.X) + with self.assertRaisesRegex(ssl.CertificateError, regex): _, server = self._setup(SimpleIMAPHandler) client = self.imap_class(*server.server_address, ssl_context=ssl_context) @@ -954,10 +959,13 @@ def test_ssl_verified(self): ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ssl_context.load_verify_locations(CAFILE) - with self.assertRaisesRegex( - ssl.CertificateError, - "IP address mismatch, certificate is not valid for " - "'127.0.0.1'"): + # Allow for flexible libssl error messages. + regex = re.compile(r"""( + IP address mismatch, certificate is not valid for '127.0.0.1' # OpenSSL + | + CERTIFICATE_VERIFY_FAILED # AWS-LC + )""", re.X) + with self.assertRaisesRegex(ssl.CertificateError, regex): with self.reaped_server(SimpleIMAPHandler) as server: client = self.imap_class(*server.server_address, ssl_context=ssl_context) diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index 489cb5e23ba57e..3fa806ddc2cde7 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -551,7 +551,7 @@ def test_openssl_version(self): else: openssl_ver = f"OpenSSL {major:d}.{minor:d}.{fix:d}" self.assertTrue( - s.startswith((openssl_ver, libressl_ver)), + s.startswith((openssl_ver, libressl_ver, "AWS-LC")), (s, t, hex(n)) ) @@ -1169,24 +1169,30 @@ def test_load_cert_chain(self): with self.assertRaises(OSError) as cm: ctx.load_cert_chain(NONEXISTINGCERT) self.assertEqual(cm.exception.errno, errno.ENOENT) - with self.assertRaisesRegex(ssl.SSLError, "PEM lib"): + with self.assertRaisesRegex(ssl.SSLError, "PEM (lib|routines)"): ctx.load_cert_chain(BADCERT) - with self.assertRaisesRegex(ssl.SSLError, "PEM lib"): + with self.assertRaisesRegex(ssl.SSLError, "PEM (lib|routines)"): ctx.load_cert_chain(EMPTYCERT) # Separate key and cert ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) ctx.load_cert_chain(ONLYCERT, ONLYKEY) ctx.load_cert_chain(certfile=ONLYCERT, keyfile=ONLYKEY) ctx.load_cert_chain(certfile=BYTES_ONLYCERT, keyfile=BYTES_ONLYKEY) - with self.assertRaisesRegex(ssl.SSLError, "PEM lib"): + with self.assertRaisesRegex(ssl.SSLError, "PEM (lib|routines)"): ctx.load_cert_chain(ONLYCERT) - with self.assertRaisesRegex(ssl.SSLError, "PEM lib"): + with self.assertRaisesRegex(ssl.SSLError, "PEM (lib|routines)"): ctx.load_cert_chain(ONLYKEY) - with self.assertRaisesRegex(ssl.SSLError, "PEM lib"): + with self.assertRaisesRegex(ssl.SSLError, "PEM (lib|routines)"): ctx.load_cert_chain(certfile=ONLYKEY, keyfile=ONLYCERT) # Mismatching key and cert ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) - with self.assertRaisesRegex(ssl.SSLError, "key values mismatch"): + # Allow for flexible libssl error messages. + regex = re.compile(r"""( + key values mismatch # OpenSSL + | + KEY_VALUES_MISMATCH # AWS-LC + )""", re.X) + with self.assertRaisesRegex(ssl.SSLError, regex): ctx.load_cert_chain(CAFILE_CACERT, ONLYKEY) # Password protected key and cert ctx.load_cert_chain(CERTFILE_PROTECTED, password=KEY_PASSWORD) @@ -1254,7 +1260,7 @@ def test_load_verify_locations(self): with self.assertRaises(OSError) as cm: ctx.load_verify_locations(NONEXISTINGCERT) self.assertEqual(cm.exception.errno, errno.ENOENT) - with self.assertRaisesRegex(ssl.SSLError, "PEM lib"): + with self.assertRaisesRegex(ssl.SSLError, "PEM (lib|routines)"): ctx.load_verify_locations(BADCERT) ctx.load_verify_locations(CERTFILE, CAPATH) ctx.load_verify_locations(CERTFILE, capath=BYTES_CAPATH) @@ -1662,9 +1668,10 @@ def test_lib_reason(self): with self.assertRaises(ssl.SSLError) as cm: ctx.load_dh_params(CERTFILE) self.assertEqual(cm.exception.library, 'PEM') - self.assertEqual(cm.exception.reason, 'NO_START_LINE') + regex = "(NO_START_LINE|UNSUPPORTED_PUBLIC_KEY_TYPE)" + self.assertRegex(cm.exception.reason, regex) s = str(cm.exception) - self.assertTrue(s.startswith("[PEM: NO_START_LINE] no start line"), s) + self.assertTrue("NO_START_LINE" in s, s) def test_subclass(self): # Check that the appropriate SSLError subclass is raised @@ -1844,7 +1851,13 @@ def test_connect_fail(self): s = test_wrap_socket(socket.socket(socket.AF_INET), cert_reqs=ssl.CERT_REQUIRED) self.addCleanup(s.close) - self.assertRaisesRegex(ssl.SSLError, "certificate verify failed", + # Allow for flexible libssl error messages. + regex = re.compile(r"""( + certificate verify failed # OpenSSL + | + CERTIFICATE_VERIFY_FAILED # AWS-LC + )""", re.X) + self.assertRaisesRegex(ssl.SSLError, regex, s.connect, self.server_addr) def test_connect_ex(self): @@ -1912,7 +1925,13 @@ def test_connect_with_context_fail(self): server_hostname=SIGNED_CERTFILE_HOSTNAME ) self.addCleanup(s.close) - self.assertRaisesRegex(ssl.SSLError, "certificate verify failed", + # Allow for flexible libssl error messages. + regex = re.compile(r"""( + certificate verify failed # OpenSSL + | + CERTIFICATE_VERIFY_FAILED # AWS-LC + )""", re.X) + self.assertRaisesRegex(ssl.SSLError, regex, s.connect, self.server_addr) def test_connect_capath(self): @@ -2129,14 +2148,16 @@ def test_bio_handshake(self): self.assertIsNone(sslobj.version()) self.assertIsNone(sslobj.shared_ciphers()) self.assertRaises(ValueError, sslobj.getpeercert) - if 'tls-unique' in ssl.CHANNEL_BINDING_TYPES: + # tls-unique is not defined for TLSv1.3 + # https://datatracker.ietf.org/doc/html/rfc8446#appendix-C.5 + if 'tls-unique' in ssl.CHANNEL_BINDING_TYPES and sslobj.version() != "TLSv1.3": self.assertIsNone(sslobj.get_channel_binding('tls-unique')) self.ssl_io_loop(sock, incoming, outgoing, sslobj.do_handshake) self.assertTrue(sslobj.cipher()) self.assertIsNone(sslobj.shared_ciphers()) self.assertIsNotNone(sslobj.version()) self.assertTrue(sslobj.getpeercert()) - if 'tls-unique' in ssl.CHANNEL_BINDING_TYPES: + if 'tls-unique' in ssl.CHANNEL_BINDING_TYPES and sslobj.version() != "TLSv1.3": self.assertTrue(sslobj.get_channel_binding('tls-unique')) try: self.ssl_io_loop(sock, incoming, outgoing, sslobj.unwrap) @@ -2861,11 +2882,16 @@ def test_crl_check(self): client_context.verify_flags |= ssl.VERIFY_CRL_CHECK_LEAF server = ThreadedEchoServer(context=server_context, chatty=True) + # Allow for flexible libssl error messages. + regex = re.compile(r"""( + certificate verify failed # OpenSSL + | + CERTIFICATE_VERIFY_FAILED # AWS-LC + )""", re.X) with server: with client_context.wrap_socket(socket.socket(), server_hostname=hostname) as s: - with self.assertRaisesRegex(ssl.SSLError, - "certificate verify failed"): + with self.assertRaisesRegex(ssl.SSLError, regex): s.connect((HOST, server.port)) # now load a CRL file. The CRL file is signed by the CA. @@ -2896,12 +2922,16 @@ def test_check_hostname(self): # incorrect hostname should raise an exception server = ThreadedEchoServer(context=server_context, chatty=True) + # Allow for flexible libssl error messages. + regex = re.compile(r"""( + certificate verify failed # OpenSSL + | + CERTIFICATE_VERIFY_FAILED # AWS-LC + )""", re.X) with server: with client_context.wrap_socket(socket.socket(), server_hostname="invalid") as s: - with self.assertRaisesRegex( - ssl.CertificateError, - "Hostname mismatch, certificate is not valid for 'invalid'."): + with self.assertRaisesRegex(ssl.CertificateError, regex): s.connect((HOST, server.port)) # missing server_hostname arg should cause an exception, too @@ -3137,7 +3167,7 @@ def test_wrong_cert_tls13(self): s.connect((HOST, server.port)) with self.assertRaisesRegex( ssl.SSLError, - 'alert unknown ca|EOF occurred' + 'alert unknown ca|EOF occurred|TLSV1_ALERT_UNKNOWN_CA' ): # TLS 1.3 perform client cert exchange after handshake s.write(b'data') @@ -3201,13 +3231,21 @@ def test_ssl_cert_verify_error(self): server_hostname=SIGNED_CERTFILE_HOSTNAME) as s: try: s.connect((HOST, server.port)) + self.fail("Expected connection failure") except ssl.SSLError as e: msg = 'unable to get local issuer certificate' self.assertIsInstance(e, ssl.SSLCertVerificationError) self.assertEqual(e.verify_code, 20) self.assertEqual(e.verify_message, msg) - self.assertIn(msg, repr(e)) - self.assertIn('certificate verify failed', repr(e)) + # Allow for flexible libssl error messages. + regex = f"({msg}|CERTIFICATE_VERIFY_FAILED)" + self.assertRegex(repr(e), regex) + regex = re.compile(r"""( + certificate verify failed # OpenSSL + | + CERTIFICATE_VERIFY_FAILED # AWS-LC + )""", re.X) + self.assertRegex(repr(e), regex) def test_PROTOCOL_TLS(self): """Connecting to an SSLv23 server with various client options""" @@ -3739,7 +3777,7 @@ def test_no_shared_ciphers(self): server_hostname=hostname) as s: with self.assertRaises(OSError): s.connect((HOST, server.port)) - self.assertIn("no shared cipher", server.conn_errors[0]) + self.assertIn("NO_SHARED_CIPHER", server.conn_errors[0]) def test_version_basic(self): """ @@ -3827,7 +3865,7 @@ def test_min_max_version_mismatch(self): server_hostname=hostname) as s: with self.assertRaises(ssl.SSLError) as e: s.connect((HOST, server.port)) - self.assertIn("alert", str(e.exception)) + self.assertRegex("(alert|ALERT)", str(e.exception)) @requires_tls_version('SSLv3') def test_min_max_version_sslv3(self): @@ -3869,6 +3907,10 @@ def test_tls_unique_channel_binding(self): client_context, server_context, hostname = testing_context() + # tls-unique is not defined for TLSv1.3 + # https://datatracker.ietf.org/doc/html/rfc8446#appendix-C.5 + client_context.maximum_version = ssl.TLSVersion.TLSv1_2 + server = ThreadedEchoServer(context=server_context, chatty=True, connectionchatty=False) @@ -3969,7 +4011,7 @@ def test_dh_params(self): cipher = stats["cipher"][0] parts = cipher.split("-") if "ADH" not in parts and "EDH" not in parts and "DHE" not in parts: - self.fail("Non-DH cipher: " + cipher[0]) + self.fail("Non-DH key exchange: " + cipher[0]) def test_ecdh_curve(self): # server secp384r1, client auto @@ -4136,8 +4178,9 @@ def cb_raising(ssl_sock, server_name, initial_context): chatty=False, sni_name='supermessage') - self.assertEqual(cm.exception.reason, - 'SSLV3_ALERT_HANDSHAKE_FAILURE') + # Allow for flexible libssl error messages. + regex = "(SSLV3_ALERT_HANDSHAKE_FAILURE|NO_PRIVATE_VALUE)" + self.assertRegex(regex, cm.exception.reason) self.assertEqual(catch.unraisable.exc_type, ZeroDivisionError) def test_sni_callback_wrong_return_type(self): diff --git a/Misc/NEWS.d/next/Tests/2024-03-21-11-32-29.gh-issue-116333.F-9Ram.rst b/Misc/NEWS.d/next/Tests/2024-03-21-11-32-29.gh-issue-116333.F-9Ram.rst new file mode 100644 index 00000000000000..3fdb6bb3bd7af7 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-03-21-11-32-29.gh-issue-116333.F-9Ram.rst @@ -0,0 +1,3 @@ +Tests of TLS related things (error codes, etc) were updated to be more +lenient about specific error message strings and behaviors as seen in the +BoringSSL and AWS-LC forks of OpenSSL. From 570a82d46abfebb9976961113fb0f8bb400ad182 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 21 Mar 2024 12:37:41 -0700 Subject: [PATCH 09/39] gh-117045: Add code object to function version cache (#117028) Changes to the function version cache: - In addition to the function object, also store the code object, and allow the latter to be retrieved even if the function has been evicted. - Stop assigning new function versions after a critical attribute (e.g. `__code__`) has been modified; the version is permanently reset to zero in this case. - Changes to `__annotations__` are no longer considered critical. (This fixes gh-109998.) Changes to the Tier 2 optimization machinery: - If we cannot map a function version to a function, but it is still mapped to a code object, we continue projecting the trace. The operand of the `_PUSH_FRAME` and `_POP_FRAME` opcodes can be either NULL, a function object, or a code object with the lowest bit set. This allows us to trace through code that calls an ephemeral function, i.e., a function that may not be alive when we are constructing the executor, e.g. a generator expression or certain nested functions. We will lose globals removal inside such functions, but we can still do other peephole operations (and even possibly [call inlining](https://github.com/python/cpython/pull/116290), if we decide to do it), which only need the code object. As before, if we cannot retrieve the code object from the cache, we stop projecting. --- Include/internal/pycore_frame.h | 2 +- Include/internal/pycore_function.h | 15 ++- Objects/codeobject.c | 1 + Objects/funcobject.c | 143 ++++++++++++++++++----------- Python/optimizer.c | 62 +++++++++---- Python/optimizer_analysis.c | 30 ++++-- Python/optimizer_bytecodes.c | 27 ++++-- Python/optimizer_cases.c.h | 25 +++-- 8 files changed, 209 insertions(+), 96 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 0f9e7333cf1e1c..74d9e4cac72c0e 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -55,7 +55,7 @@ enum _frameowner { }; typedef struct _PyInterpreterFrame { - PyObject *f_executable; /* Strong reference */ + PyObject *f_executable; /* Strong reference (code object or None) */ struct _PyInterpreterFrame *previous; PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */ PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */ diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index dad6a89af77dec..24fbb3ddbee602 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -17,20 +17,27 @@ extern PyObject* _PyFunction_Vectorcall( #define FUNC_MAX_WATCHERS 8 #define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ + +struct _func_version_cache_item { + PyFunctionObject *func; + PyObject *code; +}; + struct _py_func_state { uint32_t next_version; - // Borrowed references to function objects whose + // Borrowed references to function and code objects whose // func_version % FUNC_VERSION_CACHE_SIZE // once was equal to the index in the table. - // They are cleared when the function is deallocated. - PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE]; + // They are cleared when the function or code object is deallocated. + struct _func_version_cache_item func_version_cache[FUNC_VERSION_CACHE_SIZE]; }; extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr); extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func); PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); -PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version); +void _PyFunction_ClearCodeByVersion(uint32_t version); +PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code); extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3df733eb4ee578..bdde12d77caf07 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1710,6 +1710,7 @@ code_dealloc(PyCodeObject *co) } Py_SET_REFCNT(co, 0); + _PyFunction_ClearCodeByVersion(co->co_version); if (co->co_extra != NULL) { PyInterpreterState *interp = _PyInterpreterState_GET(); _PyCodeObjectExtra *co_extra = co->co_extra; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index a506166916de48..a3c0800e7891d3 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -218,43 +218,61 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname } /* -Function versions ------------------ +(This is purely internal documentation. There are no public APIs here.) -Function versions are used to detect when a function object has been -updated, invalidating inline cache data used by the `CALL` bytecode -(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations). +Function (and code) versions +---------------------------- -They are also used by the Tier 2 superblock creation code to find -the function being called (and from there the code object). +The Tier 1 specializer generates CALL variants that can be invalidated +by changes to critical function attributes: -How does a function's `func_version` field get initialized? +- __code__ +- __defaults__ +- __kwdefaults__ +- __closure__ -- `PyFunction_New` and friends initialize it to 0. -- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`. -- It is reset to 0 when various attributes like `__code__` are set. -- A new version is allocated by `_PyFunction_GetVersionForCurrentState` - when the specializer needs a version and the version is 0. +For this purpose function objects have a 32-bit func_version member +that the specializer writes to the specialized instruction's inline +cache and which is checked by a guard on the specialized instructions. -The latter allocates versions using a counter in the interpreter state, -`interp->func_state.next_version`. -When the counter wraps around to 0, no more versions are allocated. -There is one other special case: functions with a non-standard -`vectorcall` field are not given a version. +The MAKE_FUNCTION bytecode sets func_version from the code object's +co_version field. The latter is initialized from a counter in the +interpreter state (interp->func_state.next_version) and never changes. +When this counter overflows, it remains zero and the specializer loses +the ability to specialize calls to new functions. -When the function version is 0, the `CALL` bytecode is not specialized. +The func_version is reset to zero when any of the critical attributes +is modified; after this point the specializer will no longer specialize +calls to this function, and the guard will always fail. -Code object versions --------------------- +The function and code version cache +----------------------------------- -So where to code objects get their `co_version`? -They share the same counter, `interp->func_state.next_version`. +The Tier 2 optimizer now has a problem, since it needs to find the +function and code objects given only the version number from the inline +cache. Our solution is to maintain a cache mapping version numbers to +function and code objects. To limit the cache size we could hash +the version number, but for now we simply use it modulo the table size. + +There are some corner cases (e.g. generator expressions) where we will +be unable to find the function object in the cache but we can still +find the code object. For this reason the cache stores both the +function object and the code object. + +The cache doesn't contain strong references; cache entries are +invalidated whenever the function or code object is deallocated. + +Invariants +---------- + +These should hold at any time except when one of the cache-mutating +functions is running. + +- For any slot s at index i: + - s->func == NULL or s->func->func_version % FUNC_VERSION_CACHE_SIZE == i + - s->code == NULL or s->code->co_version % FUNC_VERSION_CACHE_SIZE == i + if s->func != NULL, then s->func->func_code == s->code -Code objects get a new `co_version` allocated from this counter upon -creation. Since code objects are nominally immutable, `co_version` can -not be invalidated. The only way it can be 0 is when 2**32 or more -code objects have been created during the process's lifetime. -(The counter isn't reset by `fork()`, extending the lifetime.) */ void @@ -262,28 +280,61 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) { PyInterpreterState *interp = _PyInterpreterState_GET(); if (func->func_version != 0) { - PyFunctionObject **slot = + struct _func_version_cache_item *slot = interp->func_state.func_version_cache + (func->func_version % FUNC_VERSION_CACHE_SIZE); - if (*slot == func) { - *slot = NULL; + if (slot->func == func) { + slot->func = NULL; + // Leave slot->code alone, there may be use for it. } } func->func_version = version; if (version != 0) { - interp->func_state.func_version_cache[ - version % FUNC_VERSION_CACHE_SIZE] = func; + struct _func_version_cache_item *slot = + interp->func_state.func_version_cache + + (version % FUNC_VERSION_CACHE_SIZE); + slot->func = func; + slot->code = func->func_code; + } +} + +void +_PyFunction_ClearCodeByVersion(uint32_t version) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _func_version_cache_item *slot = + interp->func_state.func_version_cache + + (version % FUNC_VERSION_CACHE_SIZE); + if (slot->code) { + assert(PyCode_Check(slot->code)); + PyCodeObject *code = (PyCodeObject *)slot->code; + if (code->co_version == version) { + slot->code = NULL; + slot->func = NULL; + } } } PyFunctionObject * -_PyFunction_LookupByVersion(uint32_t version) +_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code) { PyInterpreterState *interp = _PyInterpreterState_GET(); - PyFunctionObject *func = interp->func_state.func_version_cache[ - version % FUNC_VERSION_CACHE_SIZE]; - if (func != NULL && func->func_version == version) { - return func; + struct _func_version_cache_item *slot = + interp->func_state.func_version_cache + + (version % FUNC_VERSION_CACHE_SIZE); + if (slot->code) { + assert(PyCode_Check(slot->code)); + PyCodeObject *code = (PyCodeObject *)slot->code; + if (code->co_version == version) { + *p_code = slot->code; + } + } + else { + *p_code = NULL; + } + if (slot->func && slot->func->func_version == version) { + assert(slot->func->func_code == slot->code); + return slot->func; } return NULL; } @@ -291,19 +342,7 @@ _PyFunction_LookupByVersion(uint32_t version) uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func) { - if (func->func_version != 0) { - return func->func_version; - } - if (func->vectorcall != _PyFunction_Vectorcall) { - return 0; - } - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->func_state.next_version == 0) { - return 0; - } - uint32_t v = interp->func_state.next_version++; - _PyFunction_SetVersion(func, v); - return v; + return func->func_version; } PyObject * @@ -507,7 +546,6 @@ PyFunction_SetAnnotations(PyObject *op, PyObject *annotations) "non-dict annotations"); return -1; } - _PyFunction_SetVersion((PyFunctionObject *)op, 0); Py_XSETREF(((PyFunctionObject *)op)->func_annotations, annotations); return 0; } @@ -731,7 +769,6 @@ func_set_annotations(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(igno "__annotations__ must be set to a dict object"); return -1; } - _PyFunction_SetVersion(op, 0); Py_XSETREF(op->func_annotations, Py_XNewRef(value)); return 0; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 4a3cd46ce80a26..177ad343618c37 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -211,7 +211,7 @@ _PyOptimizer_Optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **executor_ptr) { - PyCodeObject *code = (PyCodeObject *)frame->f_executable; + PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); PyInterpreterState *interp = _PyInterpreterState_GET(); if (!has_space_for_executor(code, start)) { @@ -479,8 +479,9 @@ BRANCH_TO_GUARD[4][2] = { ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \ goto done; \ } \ - assert(func->func_code == (PyObject *)code); \ + assert(func == NULL || func->func_code == (PyObject *)code); \ trace_stack[trace_stack_depth].func = func; \ + trace_stack[trace_stack_depth].code = code; \ trace_stack[trace_stack_depth].instr = instr; \ trace_stack_depth++; #define TRACE_STACK_POP() \ @@ -489,7 +490,8 @@ BRANCH_TO_GUARD[4][2] = { } \ trace_stack_depth--; \ func = trace_stack[trace_stack_depth].func; \ - code = (PyCodeObject *)trace_stack[trace_stack_depth].func->func_code; \ + code = trace_stack[trace_stack_depth].code; \ + assert(func == NULL || func->func_code == (PyObject *)code); \ instr = trace_stack[trace_stack_depth].instr; /* Returns 1 on success, @@ -505,7 +507,7 @@ translate_bytecode_to_trace( _PyBloomFilter *dependencies) { bool progress_needed = true; - PyCodeObject *code = (PyCodeObject *)frame->f_executable; + PyCodeObject *code = _PyFrame_GetCode(frame); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; assert(PyFunction_Check(func)); PyCodeObject *initial_code = code; @@ -515,6 +517,7 @@ translate_bytecode_to_trace( int max_length = buffer_size; struct { PyFunctionObject *func; + PyCodeObject *code; _Py_CODEUNIT *instr; } trace_stack[TRACE_STACK_SIZE]; int trace_stack_depth = 0; @@ -719,9 +722,19 @@ translate_bytecode_to_trace( if (uop == _POP_FRAME) { TRACE_STACK_POP(); - /* Set the operand to the function object returned to, - * to assist optimization passes */ - ADD_TO_TRACE(uop, oparg, (uintptr_t)func, target); + /* Set the operand to the function or code object returned to, + * to assist optimization passes. (See _PUSH_FRAME below.) + */ + if (func != NULL) { + operand = (uintptr_t)func; + } + else if (code != NULL) { + operand = (uintptr_t)code | 1; + } + else { + operand = 0; + } + ADD_TO_TRACE(uop, oparg, operand, target); DPRINTF(2, "Returning to %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -738,10 +751,12 @@ translate_bytecode_to_trace( // Add one to account for the actual opcode/oparg pair: + 1; uint32_t func_version = read_u32(&instr[func_version_offset].cache); - PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version); - DPRINTF(2, "Function: version=%#x; object=%p\n", (int)func_version, new_func); - if (new_func != NULL) { - PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(new_func); + PyCodeObject *new_code = NULL; + PyFunctionObject *new_func = + _PyFunction_LookupByVersion(func_version, (PyObject **) &new_code); + DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n", + (int)func_version, new_func, new_code); + if (new_code != NULL) { if (new_code == code) { // Recursive call, bail (we could be here forever). DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n", @@ -766,9 +781,22 @@ translate_bytecode_to_trace( instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); _Py_BloomFilter_Add(dependencies, new_code); - /* Set the operand to the callee's function object, - * to assist optimization passes */ - ADD_TO_TRACE(uop, oparg, (uintptr_t)new_func, target); + /* Set the operand to the callee's function or code object, + * to assist optimization passes. + * We prefer setting it to the function (for remove_globals()) + * but if that's not available but the code is available, + * use the code, setting the low bit so the optimizer knows. + */ + if (new_func != NULL) { + operand = (uintptr_t)new_func; + } + else if (new_code != NULL) { + operand = (uintptr_t)new_code | 1; + } + else { + operand = 0; + } + ADD_TO_TRACE(uop, oparg, operand, target); code = new_code; func = new_func; instr = _PyCode_CODE(code); @@ -780,8 +808,8 @@ translate_bytecode_to_trace( 2 * INSTR_IP(instr, code)); goto top; } - DPRINTF(2, "Bail, new_func == NULL\n"); - ADD_TO_TRACE(uop, oparg, operand, target); + DPRINTF(2, "Bail, new_code == NULL\n"); + ADD_TO_TRACE(uop, oparg, 0, target); ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } @@ -1116,7 +1144,7 @@ counter_optimize( int Py_UNUSED(curr_stackentries) ) { - PyCodeObject *code = (PyCodeObject *)frame->f_executable; + PyCodeObject *code = _PyFrame_GetCode(frame); int oparg = instr->op.arg; while (instr->op.code == EXTENDED_ARG) { instr++; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6c460c5359d71e..95924a57cfdaf4 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -228,7 +228,12 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins_watched <<= 1; globals_watched <<= 1; function_checked <<= 1; - PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; + uint64_t operand = buffer[pc].operand; + if (operand == 0 || (operand & 1)) { + // It's either a code object or NULL, so bail + return 1; + } + PyFunctionObject *func = (PyFunctionObject *)operand; if (func == NULL) { return 1; } @@ -251,7 +256,15 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins_watched >>= 1; globals_watched >>= 1; function_checked >>= 1; - PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; + uint64_t operand = buffer[pc].operand; + if (operand == 0 || (operand & 1)) { + // It's either a code object or NULL, so bail + return 1; + } + PyFunctionObject *func = (PyFunctionObject *)operand; + if (func == NULL) { + return 1; + } assert(PyFunction_Check(func)); function_version = func->func_version; globals = func->func_globals; @@ -522,7 +535,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) static void peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) { - PyCodeObject *co = (PyCodeObject *)frame->f_executable; + PyCodeObject *co = _PyFrame_GetCode(frame); for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; switch(opcode) { @@ -545,11 +558,16 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s case _PUSH_FRAME: case _POP_FRAME: { - PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; - if (func == NULL) { + uint64_t operand = buffer[pc].operand; + if (operand & 1) { + co = (PyCodeObject *)(operand & ~1); + assert(PyCode_Check(co)); + } + else if (operand == 0) { co = NULL; } else { + PyFunctionObject *func = (PyFunctionObject *)operand; assert(PyFunction_Check(func)); co = (PyCodeObject *)func->func_code; } @@ -587,7 +605,7 @@ _Py_uop_analyze_and_optimize( peephole_opt(frame, buffer, buffer_size); err = optimize_uops( - (PyCodeObject *)frame->f_executable, buffer, + _PyFrame_GetCode(frame), buffer, buffer_size, curr_stacklen, dependencies); if (err == 0) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index a1ef644e4621e2..e761b1b3433f04 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -543,14 +543,25 @@ dummy_func(void) { (void)callable; - PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; - DPRINTF(3, "func: %p ", func); - if (func == NULL) { - DPRINTF(3, "\n"); - DPRINTF(1, "Missing function\n"); - goto done; - } - PyCodeObject *co = (PyCodeObject *)func->func_code; + PyCodeObject *co = NULL; + assert((this_instr + 2)->opcode == _PUSH_FRAME); + uintptr_t push_operand = (this_instr + 2)->operand; + if (push_operand & 1) { + co = (PyCodeObject *)(push_operand & ~1); + DPRINTF(3, "code=%p ", co); + assert(PyCode_Check(co)); + } + else { + PyFunctionObject *func = (PyFunctionObject *)push_operand; + DPRINTF(3, "func=%p ", func); + if (func == NULL) { + DPRINTF(3, "\n"); + DPRINTF(1, "Missing function\n"); + goto done; + } + co = (PyCodeObject *)func->func_code; + DPRINTF(3, "code=%p ", co); + } assert(self_or_null != NULL); assert(args != NULL); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index a0ecf58905f287..2908a26c20973e 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1596,14 +1596,25 @@ callable = stack_pointer[-2 - oparg]; int argcount = oparg; (void)callable; - PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; - DPRINTF(3, "func: %p ", func); - if (func == NULL) { - DPRINTF(3, "\n"); - DPRINTF(1, "Missing function\n"); - goto done; + PyCodeObject *co = NULL; + assert((this_instr + 2)->opcode == _PUSH_FRAME); + uintptr_t push_operand = (this_instr + 2)->operand; + if (push_operand & 1) { + co = (PyCodeObject *)(push_operand & ~1); + DPRINTF(3, "code=%p ", co); + assert(PyCode_Check(co)); + } + else { + PyFunctionObject *func = (PyFunctionObject *)push_operand; + DPRINTF(3, "func=%p ", func); + if (func == NULL) { + DPRINTF(3, "\n"); + DPRINTF(1, "Missing function\n"); + goto done; + } + co = (PyCodeObject *)func->func_code; + DPRINTF(3, "code=%p ", co); } - PyCodeObject *co = (PyCodeObject *)func->func_code; assert(self_or_null != NULL); assert(args != NULL); if (sym_is_not_null(self_or_null)) { From 0907871d43bffb613cbd560224e1a9db13d06c06 Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Thu, 21 Mar 2024 15:47:09 -0400 Subject: [PATCH 10/39] docs: fix over-linking in dataclasses.rst (#117005) --- Doc/library/dataclasses.rst | 158 ++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 78 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index c612c138fc6ea8..61b2263339da71 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -1,5 +1,5 @@ -:mod:`dataclasses` --- Data Classes -=================================== +:mod:`!dataclasses` --- Data Classes +==================================== .. module:: dataclasses :synopsis: Generate special methods on user-defined classes. @@ -31,7 +31,7 @@ using :pep:`526` type annotations. For example, this code:: def total_cost(self) -> float: return self.unit_price * self.quantity_on_hand -will add, among other things, a :meth:`~object.__init__` that looks like:: +will add, among other things, a :meth:`!__init__` that looks like:: def __init__(self, name: str, unit_price: float, quantity_on_hand: int = 0): self.name = name @@ -49,26 +49,26 @@ Module contents .. decorator:: dataclass(*, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, match_args=True, kw_only=False, slots=False, weakref_slot=False) This function is a :term:`decorator` that is used to add generated - :term:`special method`\s to classes, as described below. + :term:`special methods ` to classes, as described below. - The :func:`dataclass` decorator examines the class to find + The ``@dataclass`` decorator examines the class to find ``field``\s. A ``field`` is defined as a class variable that has a :term:`type annotation `. With two - exceptions described below, nothing in :func:`dataclass` + exceptions described below, nothing in ``@dataclass`` examines the type specified in the variable annotation. The order of the fields in all of the generated methods is the order in which they appear in the class definition. - The :func:`dataclass` decorator will add various "dunder" methods to + The ``@dataclass`` decorator will add various "dunder" methods to the class, described below. If any of the added methods already exist in the class, the behavior depends on the parameter, as documented below. The decorator returns the same class that it is called on; no new class is created. - If :func:`dataclass` is used just as a simple decorator with no parameters, + If ``@dataclass`` is used just as a simple decorator with no parameters, it acts as if it has the default values documented in this - signature. That is, these three uses of :func:`dataclass` are + signature. That is, these three uses of ``@dataclass`` are equivalent:: @dataclass @@ -84,12 +84,12 @@ Module contents class C: ... - The parameters to :func:`dataclass` are: + The parameters to ``@dataclass`` are: - ``init``: If true (the default), a :meth:`~object.__init__` method will be generated. - If the class already defines :meth:`~object.__init__`, this parameter is + If the class already defines :meth:`!__init__`, this parameter is ignored. - ``repr``: If true (the default), a :meth:`~object.__repr__` method will be @@ -99,7 +99,7 @@ Module contents are not included. For example: ``InventoryItem(name='widget', unit_price=3.0, quantity_on_hand=10)``. - If the class already defines :meth:`~object.__repr__`, this parameter is + If the class already defines :meth:`!__repr__`, this parameter is ignored. - ``eq``: If true (the default), an :meth:`~object.__eq__` method will be @@ -107,7 +107,7 @@ Module contents of its fields, in order. Both instances in the comparison must be of the identical type. - If the class already defines :meth:`~object.__eq__`, this parameter is + If the class already defines :meth:`!__eq__`, this parameter is ignored. - ``order``: If true (the default is ``False``), :meth:`~object.__lt__`, @@ -117,43 +117,43 @@ Module contents identical type. If ``order`` is true and ``eq`` is false, a :exc:`ValueError` is raised. - If the class already defines any of :meth:`~object.__lt__`, - :meth:`~object.__le__`, :meth:`~object.__gt__`, or :meth:`~object.__ge__`, then + If the class already defines any of :meth:`!__lt__`, + :meth:`!__le__`, :meth:`!__gt__`, or :meth:`!__ge__`, then :exc:`TypeError` is raised. - ``unsafe_hash``: If ``False`` (the default), a :meth:`~object.__hash__` method is generated according to how ``eq`` and ``frozen`` are set. - :meth:`~object.__hash__` is used by built-in :meth:`hash()`, and when objects are + :meth:`!__hash__` is used by built-in :meth:`hash()`, and when objects are added to hashed collections such as dictionaries and sets. Having a - :meth:`~object.__hash__` implies that instances of the class are immutable. + :meth:`!__hash__` implies that instances of the class are immutable. Mutability is a complicated property that depends on the programmer's - intent, the existence and behavior of :meth:`~object.__eq__`, and the values of - the ``eq`` and ``frozen`` flags in the :func:`dataclass` decorator. + intent, the existence and behavior of :meth:`!__eq__`, and the values of + the ``eq`` and ``frozen`` flags in the ``@dataclass`` decorator. - By default, :func:`dataclass` will not implicitly add a :meth:`~object.__hash__` + By default, ``@dataclass`` will not implicitly add a :meth:`~object.__hash__` method unless it is safe to do so. Neither will it add or change an - existing explicitly defined :meth:`~object.__hash__` method. Setting the class + existing explicitly defined :meth:`!__hash__` method. Setting the class attribute ``__hash__ = None`` has a specific meaning to Python, as - described in the :meth:`~object.__hash__` documentation. + described in the :meth:`!__hash__` documentation. - If :meth:`~object.__hash__` is not explicitly defined, or if it is set to ``None``, - then :func:`dataclass` *may* add an implicit :meth:`~object.__hash__` method. - Although not recommended, you can force :func:`dataclass` to create a - :meth:`~object.__hash__` method with ``unsafe_hash=True``. This might be the case + If :meth:`!__hash__` is not explicitly defined, or if it is set to ``None``, + then ``@dataclass`` *may* add an implicit :meth:`!__hash__` method. + Although not recommended, you can force ``@dataclass`` to create a + :meth:`!__hash__` method with ``unsafe_hash=True``. This might be the case if your class is logically immutable but can still be mutated. This is a specialized use case and should be considered carefully. - Here are the rules governing implicit creation of a :meth:`~object.__hash__` - method. Note that you cannot both have an explicit :meth:`~object.__hash__` + Here are the rules governing implicit creation of a :meth:`!__hash__` + method. Note that you cannot both have an explicit :meth:`!__hash__` method in your dataclass and set ``unsafe_hash=True``; this will result in a :exc:`TypeError`. - If ``eq`` and ``frozen`` are both true, by default :func:`dataclass` will - generate a :meth:`~object.__hash__` method for you. If ``eq`` is true and - ``frozen`` is false, :meth:`~object.__hash__` will be set to ``None``, marking it + If ``eq`` and ``frozen`` are both true, by default ``@dataclass`` will + generate a :meth:`!__hash__` method for you. If ``eq`` is true and + ``frozen`` is false, :meth:`!__hash__` will be set to ``None``, marking it unhashable (which it is, since it is mutable). If ``eq`` is false, - :meth:`~object.__hash__` will be left untouched meaning the :meth:`~object.__hash__` + :meth:`!__hash__` will be left untouched meaning the :meth:`!__hash__` method of the superclass will be used (if the superclass is :class:`object`, this means it will fall back to id-based hashing). @@ -165,7 +165,7 @@ Module contents - ``match_args``: If true (the default is ``True``), the ``__match_args__`` tuple will be created from the list of parameters to the generated :meth:`~object.__init__` method (even if - :meth:`~object.__init__` is not generated, see above). If false, or if + :meth:`!__init__` is not generated, see above). If false, or if ``__match_args__`` is already defined in the class, then ``__match_args__`` will not be generated. @@ -175,7 +175,7 @@ Module contents fields will be marked as keyword-only. If a field is marked as keyword-only, then the only effect is that the :meth:`~object.__init__` parameter generated from a keyword-only field must be specified - with a keyword when :meth:`~object.__init__` is called. There is no + with a keyword when :meth:`!__init__` is called. There is no effect on any other aspect of dataclasses. See the :term:`parameter` glossary entry for details. Also see the :const:`KW_ONLY` section. @@ -184,7 +184,7 @@ Module contents - ``slots``: If true (the default is ``False``), :attr:`~object.__slots__` attribute will be generated and new class will be returned instead of the original one. - If :attr:`~object.__slots__` is already defined in the class, then :exc:`TypeError` + If :attr:`!__slots__` is already defined in the class, then :exc:`TypeError` is raised. .. versionadded:: 3.10 @@ -229,7 +229,7 @@ Module contents required. There are, however, some dataclass features that require additional per-field information. To satisfy this need for additional information, you can replace the default field value - with a call to the provided :func:`field` function. For example:: + with a call to the provided :func:`!field` function. For example:: @dataclass class C: @@ -243,10 +243,10 @@ Module contents used because ``None`` is a valid value for some parameters with a distinct meaning. No code should directly use the :const:`MISSING` value. - The parameters to :func:`field` are: + The parameters to :func:`!field` are: - ``default``: If provided, this will be the default value for this - field. This is needed because the :meth:`field` call itself + field. This is needed because the :func:`!field` call itself replaces the normal position of the default value. - ``default_factory``: If provided, it must be a zero-argument @@ -293,10 +293,10 @@ Module contents .. versionadded:: 3.10 If the default value of a field is specified by a call to - :func:`field()`, then the class attribute for this field will be + :func:`!field`, then the class attribute for this field will be replaced by the specified ``default`` value. If no ``default`` is provided, then the class attribute will be deleted. The intent is - that after the :func:`dataclass` decorator runs, the class + that after the :func:`@dataclass ` decorator runs, the class attributes will all contain the default values for the fields, just as if the default value itself were specified. For example, after:: @@ -314,10 +314,10 @@ Module contents .. class:: Field - :class:`Field` objects describe each defined field. These objects + :class:`!Field` objects describe each defined field. These objects are created internally, and are returned by the :func:`fields` module-level method (see below). Users should never instantiate a - :class:`Field` object directly. Its documented attributes are: + :class:`!Field` object directly. Its documented attributes are: - ``name``: The name of the field. - ``type``: The type of the field. @@ -343,7 +343,7 @@ Module contents lists, and tuples are recursed into. Other objects are copied with :func:`copy.deepcopy`. - Example of using :func:`asdict` on nested dataclasses:: + Example of using :func:`!asdict` on nested dataclasses:: @dataclass class Point: @@ -364,7 +364,7 @@ Module contents dict((field.name, getattr(obj, field.name)) for field in fields(obj)) - :func:`asdict` raises :exc:`TypeError` if ``obj`` is not a dataclass + :func:`!asdict` raises :exc:`TypeError` if ``obj`` is not a dataclass instance. .. function:: astuple(obj, *, tuple_factory=tuple) @@ -384,7 +384,7 @@ Module contents tuple(getattr(obj, field.name) for field in dataclasses.fields(obj)) - :func:`astuple` raises :exc:`TypeError` if ``obj`` is not a dataclass + :func:`!astuple` raises :exc:`TypeError` if ``obj`` is not a dataclass instance. .. function:: make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, match_args=True, kw_only=False, slots=False, weakref_slot=False, module=None) @@ -397,7 +397,7 @@ Module contents ``typing.Any`` is used for ``type``. The values of ``init``, ``repr``, ``eq``, ``order``, ``unsafe_hash``, ``frozen``, ``match_args``, ``kw_only``, ``slots``, and ``weakref_slot`` have - the same meaning as they do in :func:`dataclass`. + the same meaning as they do in :func:`@dataclass `. If ``module`` is defined, the ``__module__`` attribute of the dataclass is set to that value. @@ -405,7 +405,7 @@ Module contents This function is not strictly required, because any Python mechanism for creating a new class with ``__annotations__`` can - then apply the :func:`dataclass` function to convert that class to + then apply the ``@dataclass`` function to convert that class to a dataclass. This function is provided as a convenience. For example:: @@ -438,15 +438,15 @@ Module contents :meth:`__post_init__`, if present, is also called. Init-only variables without default values, if any exist, must be - specified on the call to :func:`replace` so that they can be passed to - :meth:`~object.__init__` and :meth:`__post_init__`. + specified on the call to :func:`!replace` so that they can be passed to + :meth:`!__init__` and :meth:`__post_init__`. It is an error for ``changes`` to contain any fields that are defined as having ``init=False``. A :exc:`ValueError` will be raised in this case. Be forewarned about how ``init=False`` fields work during a call to - :func:`replace`. They are not copied from the source object, but + :func:`!replace`. They are not copied from the source object, but rather are initialized in :meth:`__post_init__`, if they're initialized at all. It is expected that ``init=False`` fields will be rarely and judiciously used. If they are used, it might be wise @@ -475,11 +475,11 @@ Module contents .. data:: KW_ONLY A sentinel value used as a type annotation. Any fields after a - pseudo-field with the type of :const:`KW_ONLY` are marked as + pseudo-field with the type of :const:`!KW_ONLY` are marked as keyword-only fields. Note that a pseudo-field of type - :const:`KW_ONLY` is otherwise completely ignored. This includes the + :const:`!KW_ONLY` is otherwise completely ignored. This includes the name of such a field. By convention, a name of ``_`` is used for a - :const:`KW_ONLY` field. Keyword-only fields signify + :const:`!KW_ONLY` field. Keyword-only fields signify :meth:`~object.__init__` parameters that must be specified as keywords when the class is instantiated. @@ -495,7 +495,7 @@ Module contents p = Point(0, y=1.5, z=2.0) In a single dataclass, it is an error to specify more than one - field whose type is :const:`KW_ONLY`. + field whose type is :const:`!KW_ONLY`. .. versionadded:: 3.10 @@ -515,9 +515,9 @@ Post-init processing When defined on the class, it will be called by the generated :meth:`~object.__init__`, normally as ``self.__post_init__()``. However, if any ``InitVar`` fields are defined, they will also be - passed to :meth:`__post_init__` in the order they were defined in the - class. If no :meth:`~object.__init__` method is generated, then - :meth:`__post_init__` will not automatically be called. + passed to :meth:`!__post_init__` in the order they were defined in the + class. If no :meth:`!__init__` method is generated, then + :meth:`!__post_init__` will not automatically be called. Among other uses, this allows for initializing field values that depend on one or more other fields. For example:: @@ -531,8 +531,8 @@ Post-init processing def __post_init__(self): self.c = self.a + self.b -The :meth:`~object.__init__` method generated by :func:`dataclass` does not call base -class :meth:`~object.__init__` methods. If the base class has an :meth:`~object.__init__` method +The :meth:`~object.__init__` method generated by :func:`@dataclass ` does not call base +class :meth:`!__init__` methods. If the base class has an :meth:`!__init__` method that has to be called, it is common to call this method in a :meth:`__post_init__` method:: @@ -548,18 +548,18 @@ that has to be called, it is common to call this method in a def __post_init__(self): super().__init__(self.side, self.side) -Note, however, that in general the dataclass-generated :meth:`~object.__init__` methods +Note, however, that in general the dataclass-generated :meth:`!__init__` methods don't need to be called, since the derived dataclass will take care of initializing all fields of any base class that is a dataclass itself. See the section below on init-only variables for ways to pass -parameters to :meth:`__post_init__`. Also see the warning about how +parameters to :meth:`!__post_init__`. Also see the warning about how :func:`replace` handles ``init=False`` fields. Class variables --------------- -One of the few places where :func:`dataclass` actually inspects the type +One of the few places where :func:`@dataclass ` actually inspects the type of a field is to determine if a field is a class variable as defined in :pep:`526`. It does this by checking if the type of the field is ``typing.ClassVar``. If a field is a ``ClassVar``, it is excluded @@ -570,7 +570,7 @@ module-level :func:`fields` function. Init-only variables ------------------- -Another place where :func:`dataclass` inspects a type annotation is to +Another place where :func:`@dataclass ` inspects a type annotation is to determine if a field is an init-only variable. It does this by seeing if the type of a field is of type ``dataclasses.InitVar``. If a field is an ``InitVar``, it is considered a pseudo-field called an init-only @@ -602,19 +602,19 @@ Frozen instances ---------------- It is not possible to create truly immutable Python objects. However, -by passing ``frozen=True`` to the :meth:`dataclass` decorator you can +by passing ``frozen=True`` to the :func:`@dataclass ` decorator you can emulate immutability. In that case, dataclasses will add :meth:`~object.__setattr__` and :meth:`~object.__delattr__` methods to the class. These methods will raise a :exc:`FrozenInstanceError` when invoked. There is a tiny performance penalty when using ``frozen=True``: :meth:`~object.__init__` cannot use simple assignment to initialize fields, and -must use :meth:`!object.__setattr__`. +must use :meth:`!__setattr__`. Inheritance ----------- -When the dataclass is being created by the :meth:`dataclass` decorator, +When the dataclass is being created by the :func:`@dataclass ` decorator, it looks through all of the class's base classes in reverse MRO (that is, starting at :class:`object`) and, for each dataclass that it finds, adds the fields from that base class to an ordered mapping of fields. @@ -641,8 +641,8 @@ The generated :meth:`~object.__init__` method for ``C`` will look like:: def __init__(self, x: int = 15, y: int = 0, z: int = 10): -Re-ordering of keyword-only parameters in :meth:`~object.__init__` ------------------------------------------------------------------- +Re-ordering of keyword-only parameters in :meth:`!__init__` +----------------------------------------------------------- After the parameters needed for :meth:`~object.__init__` are computed, any keyword-only parameters are moved to come after all regular @@ -665,7 +665,7 @@ fields, and ``Base.x`` and ``D.z`` are regular fields:: z: int = 10 t: int = field(kw_only=True, default=0) -The generated :meth:`~object.__init__` method for ``D`` will look like:: +The generated :meth:`!__init__` method for ``D`` will look like:: def __init__(self, x: Any = 15.0, z: int = 10, *, y: int = 0, w: int = 1, t: int = 0): @@ -674,7 +674,7 @@ the list of fields: parameters derived from regular fields are followed by parameters derived from keyword-only fields. The relative ordering of keyword-only parameters is maintained in the -re-ordered :meth:`~object.__init__` parameter list. +re-ordered :meth:`!__init__` parameter list. Default factory functions @@ -689,7 +689,7 @@ example, to create a new instance of a list, use:: If a field is excluded from :meth:`~object.__init__` (using ``init=False``) and the field also specifies ``default_factory``, then the default factory function will always be called from the generated -:meth:`~object.__init__` function. This happens because there is no other +:meth:`!__init__` function. This happens because there is no other way to give the field an initial value. Mutable default values @@ -738,7 +738,7 @@ for ``x`` when creating a class instance will share the same copy of ``x``. Because dataclasses just use normal Python class creation they also share this behavior. There is no general way for Data Classes to detect this condition. Instead, the -:func:`dataclass` decorator will raise a :exc:`ValueError` if it +:func:`@dataclass ` decorator will raise a :exc:`ValueError` if it detects an unhashable default parameter. The assumption is that if a value is unhashable, it is mutable. This is a partial solution, but it does protect against many common errors. @@ -764,15 +764,17 @@ Descriptor-typed fields Fields that are assigned :ref:`descriptor objects ` as their default value have the following special behaviors: -* The value for the field passed to the dataclass's ``__init__`` method is - passed to the descriptor's ``__set__`` method rather than overwriting the +* The value for the field passed to the dataclass's :meth:`~object.__init__` method is + passed to the descriptor's :meth:`~object.__set__` method rather than overwriting the descriptor object. + * Similarly, when getting or setting the field, the descriptor's - ``__get__`` or ``__set__`` method is called rather than returning or + :meth:`~object.__get__` or :meth:`!__set__` method is called rather than returning or overwriting the descriptor object. -* To determine whether a field contains a default value, ``dataclasses`` - will call the descriptor's ``__get__`` method using its class access - form (i.e. ``descriptor.__get__(obj=None, type=cls)``. If the + +* To determine whether a field contains a default value, :func:`@dataclass ` + will call the descriptor's :meth:`!__get__` method using its class access + form: ``descriptor.__get__(obj=None, type=cls)``. If the descriptor returns a value in this case, it will be used as the field's default. On the other hand, if the descriptor raises :exc:`AttributeError` in this situation, no default value will be From 50f9b0b1e0fb181875751cef951351ed007b6397 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 21 Mar 2024 23:17:09 +0100 Subject: [PATCH 11/39] gh-117061: Fix test_posix.test_sched_setaffinity() on RHEL9 (#117126) On RHEL9, sched_setaffinity(0, []) does not fail. --- Lib/test/test_posix.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index 2706d5eb6d9830..1d22869046fd12 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -1335,12 +1335,21 @@ def test_sched_getaffinity(self): def test_sched_setaffinity(self): mask = posix.sched_getaffinity(0) self.addCleanup(posix.sched_setaffinity, 0, list(mask)) + if len(mask) > 1: # Empty masks are forbidden mask.pop() posix.sched_setaffinity(0, mask) self.assertEqual(posix.sched_getaffinity(0), mask) - self.assertRaises(OSError, posix.sched_setaffinity, 0, []) + + try: + posix.sched_setaffinity(0, []) + # gh-117061: On RHEL9, sched_setaffinity(0, []) does not fail + except OSError: + # sched_setaffinity() manual page documents EINVAL error + # when the mask is empty. + pass + self.assertRaises(ValueError, posix.sched_setaffinity, 0, [-10]) self.assertRaises(ValueError, posix.sched_setaffinity, 0, map(int, "0X")) self.assertRaises(OverflowError, posix.sched_setaffinity, 0, [1<<128]) From 3ec57307e70ee6f42410e844d3399bbd598917ba Mon Sep 17 00:00:00 2001 From: Malcolm Smith Date: Thu, 21 Mar 2024 23:52:29 +0000 Subject: [PATCH 12/39] gh-71052: Add Android build script and instructions (#116426) --- Android/README.md | 64 ++++++ Android/android-env.sh | 87 ++++++++ Android/android.py | 202 ++++++++++++++++++ Include/cpython/pystate.h | 4 + ...4-03-06-17-26-55.gh-issue-71052.vLbu9u.rst | 1 + configure | 29 ++- configure.ac | 35 ++- 7 files changed, 403 insertions(+), 19 deletions(-) create mode 100644 Android/README.md create mode 100644 Android/android-env.sh create mode 100755 Android/android.py create mode 100644 Misc/NEWS.d/next/Build/2024-03-06-17-26-55.gh-issue-71052.vLbu9u.rst diff --git a/Android/README.md b/Android/README.md new file mode 100644 index 00000000000000..5ed186e06e3951 --- /dev/null +++ b/Android/README.md @@ -0,0 +1,64 @@ +# Python for Android + +These instructions are only needed if you're planning to compile Python for +Android yourself. Most users should *not* need to do this. If you're looking to +use Python on Android, one of the following tools will provide a much more +approachable user experience: + +* [Briefcase](https://briefcase.readthedocs.io), from the BeeWare project +* [Buildozer](https://buildozer.readthedocs.io), from the Kivy project +* [Chaquopy](https://chaquo.com/chaquopy/) + + +## Prerequisites + +Export the `ANDROID_HOME` environment variable to point at your Android SDK. If +you don't already have the SDK, here's how to install it: + +* Download the "Command line tools" from . +* Create a directory `android-sdk/cmdline-tools`, and unzip the command line + tools package into it. +* Rename `android-sdk/cmdline-tools/cmdline-tools` to + `android-sdk/cmdline-tools/latest`. +* `export ANDROID_HOME=/path/to/android-sdk` + + +## Building + +Building for Android requires doing a cross-build where you have a "build" +Python to help produce an Android build of CPython. This procedure has been +tested on Linux and macOS. + +The easiest way to do a build is to use the `android.py` script. You can either +have it perform the entire build process from start to finish in one step, or +you can do it in discrete steps that mirror running `configure` and `make` for +each of the two builds of Python you end up producing. + +The discrete steps for building via `android.py` are: + +```sh +./android.py configure-build +./android.py make-build +./android.py configure-host HOST +./android.py make-host HOST +``` + +To see the possible values of HOST, run `./android.py configure-host --help`. + +Or to do it all in a single command, run: + +```sh +./android.py build HOST +``` + +In the end you should have a build Python in `cross-build/build`, and an Android +build in `cross-build/HOST`. + +You can use `--` as a separator for any of the `configure`-related commands – +including `build` itself – to pass arguments to the underlying `configure` +call. For example, if you want a pydebug build that also caches the results from +`configure`, you can do: + +```sh +./android.py build HOST -- -C --with-pydebug +``` diff --git a/Android/android-env.sh b/Android/android-env.sh new file mode 100644 index 00000000000000..3ce3e035cfb8fe --- /dev/null +++ b/Android/android-env.sh @@ -0,0 +1,87 @@ +# This script must be sourced with the following variables already set: +: ${ANDROID_HOME:?} # Path to Android SDK +: ${HOST:?} # GNU target triplet + +# You may also override the following: +: ${api_level:=21} # Minimum Android API level the build will run on +: ${PREFIX:-} # Path in which to find required libraries + + +# Print all messages on stderr so they're visible when running within build-wheel. +log() { + echo "$1" >&2 +} + +fail() { + log "$1" + exit 1 +} + +# When moving to a new version of the NDK, carefully review the following: +# +# * https://developer.android.com/ndk/downloads/revision_history +# +# * https://android.googlesource.com/platform/ndk/+/ndk-rXX-release/docs/BuildSystemMaintainers.md +# where XX is the NDK version. Do a diff against the version you're upgrading from, e.g.: +# https://android.googlesource.com/platform/ndk/+/ndk-r25-release..ndk-r26-release/docs/BuildSystemMaintainers.md +ndk_version=26.2.11394342 + +ndk=$ANDROID_HOME/ndk/$ndk_version +if ! [ -e $ndk ]; then + log "Installing NDK: this may take several minutes" + yes | $ANDROID_HOME/cmdline-tools/latest/bin/sdkmanager "ndk;$ndk_version" +fi + +if [ $HOST = "arm-linux-androideabi" ]; then + clang_triplet=armv7a-linux-androideabi +else + clang_triplet=$HOST +fi + +# These variables are based on BuildSystemMaintainers.md above, and +# $ndk/build/cmake/android.toolchain.cmake. +toolchain=$(echo $ndk/toolchains/llvm/prebuilt/*) +export AR="$toolchain/bin/llvm-ar" +export AS="$toolchain/bin/llvm-as" +export CC="$toolchain/bin/${clang_triplet}${api_level}-clang" +export CXX="${CC}++" +export LD="$toolchain/bin/ld" +export NM="$toolchain/bin/llvm-nm" +export RANLIB="$toolchain/bin/llvm-ranlib" +export READELF="$toolchain/bin/llvm-readelf" +export STRIP="$toolchain/bin/llvm-strip" + +# The quotes make sure the wildcard in the `toolchain` assignment has been expanded. +for path in "$AR" "$AS" "$CC" "$CXX" "$LD" "$NM" "$RANLIB" "$READELF" "$STRIP"; do + if ! [ -e "$path" ]; then + fail "$path does not exist" + fi +done + +export CFLAGS="" +export LDFLAGS="-Wl,--build-id=sha1 -Wl,--no-rosegment" + +# Many packages get away with omitting -lm on Linux, but Android is stricter. +LDFLAGS="$LDFLAGS -lm" + +# -mstackrealign is included where necessary in the clang launcher scripts which are +# pointed to by $CC, so we don't need to include it here. +if [ $HOST = "arm-linux-androideabi" ]; then + CFLAGS="$CFLAGS -march=armv7-a -mthumb" +fi + +if [ -n "${PREFIX:-}" ]; then + abs_prefix=$(realpath $PREFIX) + CFLAGS="$CFLAGS -I$abs_prefix/include" + LDFLAGS="$LDFLAGS -L$abs_prefix/lib" + + export PKG_CONFIG="pkg-config --define-prefix" + export PKG_CONFIG_LIBDIR="$abs_prefix/lib/pkgconfig" +fi + +# Use the same variable name as conda-build +if [ $(uname) = "Darwin" ]; then + export CPU_COUNT=$(sysctl -n hw.ncpu) +else + export CPU_COUNT=$(nproc) +fi diff --git a/Android/android.py b/Android/android.py new file mode 100755 index 00000000000000..5c57e53c415d2b --- /dev/null +++ b/Android/android.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 + +import argparse +import os +import re +import shutil +import subprocess +import sys +import sysconfig +from os.path import relpath +from pathlib import Path + +SCRIPT_NAME = Path(__file__).name +CHECKOUT = Path(__file__).resolve().parent.parent +CROSS_BUILD_DIR = CHECKOUT / "cross-build" + + +def delete_if_exists(path): + if path.exists(): + print(f"Deleting {path} ...") + shutil.rmtree(path) + + +def subdir(name, *, clean=None): + path = CROSS_BUILD_DIR / name + if clean: + delete_if_exists(path) + if not path.exists(): + if clean is None: + sys.exit( + f"{path} does not exist. Create it by running the appropriate " + f"`configure` subcommand of {SCRIPT_NAME}.") + else: + path.mkdir(parents=True) + return path + + +def run(command, *, host=None, **kwargs): + env = os.environ.copy() + if host: + env_script = CHECKOUT / "Android/android-env.sh" + env_output = subprocess.run( + f"set -eu; " + f"HOST={host}; " + f"PREFIX={subdir(host)}/prefix; " + f". {env_script}; " + f"export", + check=True, shell=True, text=True, stdout=subprocess.PIPE + ).stdout + + for line in env_output.splitlines(): + # We don't require every line to match, as there may be some other + # output from installing the NDK. + if match := re.search( + "^(declare -x |export )?(\\w+)=['\"]?(.*?)['\"]?$", line + ): + key, value = match[2], match[3] + if env.get(key) != value: + print(line) + env[key] = value + + if env == os.environ: + raise ValueError(f"Found no variables in {env_script.name} output:\n" + + env_output) + + print(">", " ".join(map(str, command))) + try: + subprocess.run(command, check=True, env=env, **kwargs) + except subprocess.CalledProcessError as e: + sys.exit(e) + + +def build_python_path(): + """The path to the build Python binary.""" + build_dir = subdir("build") + binary = build_dir / "python" + if not binary.is_file(): + binary = binary.with_suffix(".exe") + if not binary.is_file(): + raise FileNotFoundError("Unable to find `python(.exe)` in " + f"{build_dir}") + + return binary + + +def configure_build_python(context): + os.chdir(subdir("build", clean=context.clean)) + + command = [relpath(CHECKOUT / "configure")] + if context.args: + command.extend(context.args) + run(command) + + +def make_build_python(context): + os.chdir(subdir("build")) + run(["make", "-j", str(os.cpu_count())]) + + +def unpack_deps(host): + deps_url = "https://github.com/beeware/cpython-android-source-deps/releases/download" + for name_ver in ["bzip2-1.0.8-1", "libffi-3.4.4-2", "openssl-3.0.13-1", + "sqlite-3.45.1-0", "xz-5.4.6-0"]: + filename = f"{name_ver}-{host}.tar.gz" + run(["wget", f"{deps_url}/{name_ver}/{filename}"]) + run(["tar", "-xf", filename]) + os.remove(filename) + + +def configure_host_python(context): + host_dir = subdir(context.host, clean=context.clean) + + prefix_dir = host_dir / "prefix" + if not prefix_dir.exists(): + prefix_dir.mkdir() + os.chdir(prefix_dir) + unpack_deps(context.host) + + build_dir = host_dir / "build" + build_dir.mkdir(exist_ok=True) + os.chdir(build_dir) + + command = [ + # Basic cross-compiling configuration + relpath(CHECKOUT / "configure"), + f"--host={context.host}", + f"--build={sysconfig.get_config_var('BUILD_GNU_TYPE')}", + f"--with-build-python={build_python_path()}", + "--without-ensurepip", + + # Android always uses a shared libpython. + "--enable-shared", + "--without-static-libpython", + + # Dependent libraries. The others are found using pkg-config: see + # android-env.sh. + f"--with-openssl={prefix_dir}", + ] + + if context.args: + command.extend(context.args) + run(command, host=context.host) + + +def make_host_python(context): + host_dir = subdir(context.host) + os.chdir(host_dir / "build") + run(["make", "-j", str(os.cpu_count())], host=context.host) + run(["make", "install", f"prefix={host_dir}/prefix"], host=context.host) + + +def build_all(context): + steps = [configure_build_python, make_build_python, configure_host_python, + make_host_python] + for step in steps: + step(context) + + +def clean_all(context): + delete_if_exists(CROSS_BUILD_DIR) + + +def main(): + parser = argparse.ArgumentParser() + subcommands = parser.add_subparsers(dest="subcommand") + build = subcommands.add_parser("build", help="Build everything") + configure_build = subcommands.add_parser("configure-build", + help="Run `configure` for the " + "build Python") + make_build = subcommands.add_parser("make-build", + help="Run `make` for the build Python") + configure_host = subcommands.add_parser("configure-host", + help="Run `configure` for Android") + make_host = subcommands.add_parser("make-host", + help="Run `make` for Android") + clean = subcommands.add_parser("clean", help="Delete files and directories " + "created by this script") + for subcommand in build, configure_build, configure_host: + subcommand.add_argument( + "--clean", action="store_true", default=False, dest="clean", + help="Delete any relevant directories before building") + for subcommand in build, configure_host, make_host: + subcommand.add_argument( + "host", metavar="HOST", + choices=["aarch64-linux-android", "x86_64-linux-android"], + help="Host triplet: choices=[%(choices)s]") + for subcommand in build, configure_build, configure_host: + subcommand.add_argument("args", nargs="*", + help="Extra arguments to pass to `configure`") + + context = parser.parse_args() + dispatch = {"configure-build": configure_build_python, + "make-build": make_build_python, + "configure-host": configure_host_python, + "make-host": make_host_python, + "build": build_all, + "clean": clean_all} + dispatch[context.subcommand](context) + + +if __name__ == "__main__": + main() diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 38d0897ea13161..f1540fde925921 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -211,6 +211,10 @@ struct _ts { # define Py_C_RECURSION_LIMIT 800 #elif defined(_WIN32) # define Py_C_RECURSION_LIMIT 3000 +#elif defined(__ANDROID__) + // On an ARM64 emulator, API level 34 was OK with 10000, but API level 21 + // crashed in test_compiler_recursion_limit. +# define Py_C_RECURSION_LIMIT 3000 #elif defined(_Py_ADDRESS_SANITIZER) # define Py_C_RECURSION_LIMIT 4000 #else diff --git a/Misc/NEWS.d/next/Build/2024-03-06-17-26-55.gh-issue-71052.vLbu9u.rst b/Misc/NEWS.d/next/Build/2024-03-06-17-26-55.gh-issue-71052.vLbu9u.rst new file mode 100644 index 00000000000000..53776c0216f553 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-03-06-17-26-55.gh-issue-71052.vLbu9u.rst @@ -0,0 +1 @@ +Add Android build script and instructions. diff --git a/configure b/configure index 229f0d32d322dd..0493d382f48481 100755 --- a/configure +++ b/configure @@ -17595,13 +17595,21 @@ fi if test "$ac_sys_system" = "Linux-android"; then # When these functions are used in an unprivileged process, they crash rather # than returning an error. - privileged_funcs="chroot initgroups setegid seteuid setgid setregid setresgid - setresuid setreuid setuid" - - # These functions are unimplemented and always return an error. - unimplemented_funcs="sem_open sem_unlink" + blocked_funcs="chroot initgroups setegid seteuid setgid sethostname + setregid setresgid setresuid setreuid setuid" + + # These functions are unimplemented and always return an error + # (https://android.googlesource.com/platform/system/sepolicy/+/refs/heads/android13-release/public/domain.te#1044) + blocked_funcs="$blocked_funcs sem_open sem_unlink" + + # Before API level 23, when fchmodat is called with the unimplemented flag + # AT_SYMLINK_NOFOLLOW, instead of returning ENOTSUP as it should, it actually + # follows the symlink. + if test "$ANDROID_API_LEVEL" -lt 23; then + blocked_funcs="$blocked_funcs fchmodat" + fi - for name in $privileged_funcs $unimplemented_funcs; do + for name in $blocked_funcs; do as_func_var=`printf "%s\n" "ac_cv_func_$name" | $as_tr_sh` eval "$as_func_var=no" @@ -22156,6 +22164,10 @@ fi done fi +# On Android before API level 23, clock_nanosleep returns the wrong value when +# interrupted by a signal (https://issuetracker.google.com/issues/216495770). +if ! { test "$ac_sys_system" = "Linux-android" && + test "$ANDROID_API_LEVEL" -lt 23; }; then for ac_func in clock_nanosleep do : @@ -22166,7 +22178,7 @@ then : else $as_nop - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for clock_nanosleep in -lrt" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for clock_nanosleep in -lrt" >&5 printf %s "checking for clock_nanosleep in -lrt... " >&6; } if test ${ac_cv_lib_rt_clock_nanosleep+y} then : @@ -22204,7 +22216,7 @@ printf "%s\n" "$ac_cv_lib_rt_clock_nanosleep" >&6; } if test "x$ac_cv_lib_rt_clock_nanosleep" = xyes then : - printf "%s\n" "#define HAVE_CLOCK_NANOSLEEP 1" >>confdefs.h + printf "%s\n" "#define HAVE_CLOCK_NANOSLEEP 1" >>confdefs.h fi @@ -22213,6 +22225,7 @@ fi fi done +fi for ac_func in nanosleep diff --git a/configure.ac b/configure.ac index cd17977738482d..cdfafc2d7272f1 100644 --- a/configure.ac +++ b/configure.ac @@ -4934,13 +4934,21 @@ fi if test "$ac_sys_system" = "Linux-android"; then # When these functions are used in an unprivileged process, they crash rather # than returning an error. - privileged_funcs="chroot initgroups setegid seteuid setgid setregid setresgid - setresuid setreuid setuid" - - # These functions are unimplemented and always return an error. - unimplemented_funcs="sem_open sem_unlink" + blocked_funcs="chroot initgroups setegid seteuid setgid sethostname + setregid setresgid setresuid setreuid setuid" + + # These functions are unimplemented and always return an error + # (https://android.googlesource.com/platform/system/sepolicy/+/refs/heads/android13-release/public/domain.te#1044) + blocked_funcs="$blocked_funcs sem_open sem_unlink" + + # Before API level 23, when fchmodat is called with the unimplemented flag + # AT_SYMLINK_NOFOLLOW, instead of returning ENOTSUP as it should, it actually + # follows the symlink. + if test "$ANDROID_API_LEVEL" -lt 23; then + blocked_funcs="$blocked_funcs fchmodat" + fi - for name in $privileged_funcs $unimplemented_funcs; do + for name in $blocked_funcs; do AS_VAR_PUSHDEF([func_var], [ac_cv_func_$name]) AS_VAR_SET([func_var], [no]) AS_VAR_POPDEF([func_var]) @@ -5303,11 +5311,16 @@ then ]) fi -AC_CHECK_FUNCS([clock_nanosleep], [], [ - AC_CHECK_LIB([rt], [clock_nanosleep], [ - AC_DEFINE([HAVE_CLOCK_NANOSLEEP], [1]) - ]) -]) +# On Android before API level 23, clock_nanosleep returns the wrong value when +# interrupted by a signal (https://issuetracker.google.com/issues/216495770). +if ! { test "$ac_sys_system" = "Linux-android" && + test "$ANDROID_API_LEVEL" -lt 23; }; then + AC_CHECK_FUNCS([clock_nanosleep], [], [ + AC_CHECK_LIB([rt], [clock_nanosleep], [ + AC_DEFINE([HAVE_CLOCK_NANOSLEEP], [1]) + ]) + ]) +fi AC_CHECK_FUNCS([nanosleep], [], [ AC_CHECK_LIB([rt], [nanosleep], [ From c4bf58a14f162557038a1535ca22c52b49d81d7b Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Thu, 21 Mar 2024 19:54:50 -0400 Subject: [PATCH 13/39] gh-116745: Remove all internal usage of @LIBPYTHON@ (#116746) Replace with MODULE_LDFLAGS. --- Misc/python-config.sh.in | 2 +- Misc/python.pc.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/python-config.sh.in b/Misc/python-config.sh.in index eb02223ddcd2c3..c3c0b34fc1451d 100644 --- a/Misc/python-config.sh.in +++ b/Misc/python-config.sh.in @@ -46,7 +46,7 @@ LIBM="@LIBM@" LIBC="@LIBC@" SYSLIBS="$LIBM $LIBC" ABIFLAGS="@ABIFLAGS@" -LIBS="@LIBPYTHON@ @LIBS@ $SYSLIBS" +LIBS="@MODULE_LDFLAGS@ @LIBS@ $SYSLIBS" LIBS_EMBED="-lpython${VERSION}${ABIFLAGS} @LIBS@ $SYSLIBS" BASECFLAGS="@BASECFLAGS@" LDLIBRARY="@LDLIBRARY@" diff --git a/Misc/python.pc.in b/Misc/python.pc.in index 027dba38585a89..c2c740e82b1fde 100644 --- a/Misc/python.pc.in +++ b/Misc/python.pc.in @@ -9,5 +9,5 @@ Description: Build a C extension for Python Requires: Version: @VERSION@ Libs.private: @LIBS@ -Libs: -L${libdir} @LIBPYTHON@ +Libs: -L${libdir} @MODULE_LDFLAGS@ Cflags: -I${includedir}/python@VERSION@@ABIFLAGS@ From b3d25df8d38b79310587da54dbd88b06a16d4904 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 21 Mar 2024 18:20:20 -0600 Subject: [PATCH 14/39] gh-105716: Fix _PyInterpreterState_IsRunningMain() For Embedders (gh-117140) When I added _PyInterpreterState_IsRunningMain() and friends last year, I tried to accommodate applications that embed Python but don't call _PyInterpreterState_SetRunningMain() (not that they're expected to). That mostly worked fine until my recent changes in gh-117049, where the subtleties with the fallback code led to failures; the change ended up breaking test_tools.test_freeze, which exercises a basic embedding situation. The simplest fix is to drop the fallback code I originally added to _PyInterpreterState_IsRunningMain() (and later to _PyThreadState_IsRunningMain()). I've kept the fallback in the _xxsubinterpreters module though. I've also updated Py_FrozenMain() to call _PyInterpreterState_SetRunningMain(). --- Modules/_xxsubinterpretersmodule.c | 22 ++++++++++++++++++++-- Python/frozenmain.c | 9 +++++++++ Python/pystate.c | 30 ++++++++++-------------------- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/Modules/_xxsubinterpretersmodule.c b/Modules/_xxsubinterpretersmodule.c index befa225c9183c5..5e5b3c10201867 100644 --- a/Modules/_xxsubinterpretersmodule.c +++ b/Modules/_xxsubinterpretersmodule.c @@ -56,6 +56,24 @@ _get_current_module(void) } +static int +is_running_main(PyInterpreterState *interp) +{ + if (_PyInterpreterState_IsRunningMain(interp)) { + return 1; + } + // Unlike with the general C-API, we can be confident that someone + // using this module for the main interpreter is doing so through + // the main program. Thus we can make this extra check. This benefits + // applications that embed Python but haven't been updated yet + // to call_PyInterpreterState_SetRunningMain(). + if (_Py_IsMainInterpreter(interp)) { + return 1; + } + return 0; +} + + /* Cross-interpreter Buffer Views *******************************************/ // XXX Release when the original interpreter is destroyed. @@ -509,7 +527,7 @@ interp_destroy(PyObject *self, PyObject *args, PyObject *kwds) // Ensure the interpreter isn't running. /* XXX We *could* support destroying a running interpreter but aren't going to worry about it for now. */ - if (_PyInterpreterState_IsRunningMain(interp)) { + if (is_running_main(interp)) { PyErr_Format(PyExc_RuntimeError, "interpreter running"); return NULL; } @@ -977,7 +995,7 @@ interp_is_running(PyObject *self, PyObject *args, PyObject *kwds) if (interp == NULL) { return NULL; } - if (_PyInterpreterState_IsRunningMain(interp)) { + if (is_running_main(interp)) { Py_RETURN_TRUE; } Py_RETURN_FALSE; diff --git a/Python/frozenmain.c b/Python/frozenmain.c index 3ce9476c9ad46c..ec4566bd4f84bc 100644 --- a/Python/frozenmain.c +++ b/Python/frozenmain.c @@ -54,6 +54,12 @@ Py_FrozenMain(int argc, char **argv) Py_ExitStatusException(status); } + PyInterpreterState *interp = PyInterpreterState_Get(); + if (_PyInterpreterState_SetRunningMain(interp) < 0) { + PyErr_Print(); + exit(1); + } + #ifdef MS_WINDOWS PyWinFreeze_ExeInit(); #endif @@ -83,6 +89,9 @@ Py_FrozenMain(int argc, char **argv) #ifdef MS_WINDOWS PyWinFreeze_ExeTerm(); #endif + + _PyInterpreterState_SetNotRunningMain(interp); + if (Py_FinalizeEx() < 0) { sts = 120; } diff --git a/Python/pystate.c b/Python/pystate.c index 47d327ae28933b..921e74ed5a9826 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1042,24 +1042,15 @@ _PyInterpreterState_IsRunningMain(PyInterpreterState *interp) if (interp->threads.main != NULL) { return 1; } - // For now, we assume the main interpreter is always running. - if (_Py_IsMainInterpreter(interp)) { - return 1; - } + // Embedders might not know to call _PyInterpreterState_SetRunningMain(), + // so their main thread wouldn't show it is running the main interpreter's + // program. (Py_Main() doesn't have this problem.) For now this isn't + // critical. If it were, we would need to infer "running main" from other + // information, like if it's the main interpreter. We used to do that + // but the naive approach led to some inconsistencies that caused problems. return 0; } -#ifndef NDEBUG -static int -is_running_main(PyThreadState *tstate) -{ - if (tstate->interp->threads.main != NULL) { - return tstate == tstate->interp->threads.main; - } - return 0; -} -#endif - int _PyThreadState_IsRunningMain(PyThreadState *tstate) { @@ -1067,9 +1058,8 @@ _PyThreadState_IsRunningMain(PyThreadState *tstate) if (interp->threads.main != NULL) { return tstate == interp->threads.main; } - if (_Py_IsMainInterpreter(interp)) { - return tstate->thread_id == interp->runtime->main_thread; - } + // See the note in _PyInterpreterState_IsRunningMain() about + // possible false negatives here for embedders. return 0; } @@ -1571,7 +1561,7 @@ PyThreadState_Clear(PyThreadState *tstate) { assert(tstate->_status.initialized && !tstate->_status.cleared); assert(current_fast_get()->interp == tstate->interp); - assert(!is_running_main(tstate)); + assert(!_PyThreadState_IsRunningMain(tstate)); // XXX assert(!tstate->_status.bound || tstate->_status.unbound); tstate->_status.finalizing = 1; // just in case @@ -1670,7 +1660,7 @@ tstate_delete_common(PyThreadState *tstate) assert(tstate->_status.cleared && !tstate->_status.finalized); assert(tstate->state != _Py_THREAD_ATTACHED); tstate_verify_not_active(tstate); - assert(!is_running_main(tstate)); + assert(!_PyThreadState_IsRunningMain(tstate)); PyInterpreterState *interp = tstate->interp; if (interp == NULL) { From 97ba910e47ad298114800587979ce7beb0a705a3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 21 Mar 2024 18:27:48 -0700 Subject: [PATCH 15/39] gh-108716:: Remove _PyStaticCode_Init/Fini (#117141) More deepfreeze cleanup. --- Objects/codeobject.c | 46 -------------------------------------------- 1 file changed, 46 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index bdde12d77caf07..f14ff73394b168 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2349,49 +2349,3 @@ _PyCode_ConstantKey(PyObject *op) } return key; } - -void -_PyStaticCode_Fini(PyCodeObject *co) -{ - if (co->co_executors != NULL) { - clear_executors(co); - } - deopt_code(co, _PyCode_CODE(co)); - PyMem_Free(co->co_extra); - if (co->_co_cached != NULL) { - Py_CLEAR(co->_co_cached->_co_code); - Py_CLEAR(co->_co_cached->_co_cellvars); - Py_CLEAR(co->_co_cached->_co_freevars); - Py_CLEAR(co->_co_cached->_co_varnames); - PyMem_Free(co->_co_cached); - co->_co_cached = NULL; - } - co->co_extra = NULL; - if (co->co_weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *)co); - co->co_weakreflist = NULL; - } - free_monitoring_data(co->_co_monitoring); - co->_co_monitoring = NULL; -} - -int -_PyStaticCode_Init(PyCodeObject *co) -{ - int res = intern_strings(co->co_names); - if (res < 0) { - return -1; - } - res = intern_string_constants(co->co_consts, NULL); - if (res < 0) { - return -1; - } - res = intern_strings(co->co_localsplusnames); - if (res < 0) { - return -1; - } - _PyCode_Quicken(co); - return 0; -} - -#define MAX_CODE_UNITS_PER_LOC_ENTRY 8 From 8383915031942f441f435a5ae800790116047b80 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 21 Mar 2024 22:27:25 -0500 Subject: [PATCH 16/39] GH-116939: Rewrite binarysort() (#116940) Rewrote binarysort() for clarity. Also changed the signature to be more coherent (it was mixing sortslice with raw pointers). No change in method or functionality. However, I left some experiments in, disabled for now via `#if` tricks. Since this code was first written, some kinds of comparisons have gotten enormously faster (like for lists of floats), which changes the tradeoffs. For example, plain insertion sort's simpler innermost loop and highly predictable branches leave it very competitive (even beating, by a bit) binary insertion when comparisons are very cheap, despite that it can do many more compares. And it wins big on runs that are already sorted (moving the next one in takes only 1 compare then). So I left code for a plain insertion sort, to make future experimenting easier. Also made the maximum value of minrun a `#define` (``MAX_MINRUN`) to make experimenting with that easier too. And another bit of `#if``-disabled code rewrites binary insertion's innermost loop to remove its unpredictable branch. Surprisingly, this doesn't really seem to help overall. I'm unclear on why not. It certainly adds more instructions, but they're very simple, and it's hard to be believe they cost as much as a branch miss. --- Objects/listobject.c | 188 ++++++++++++++++++++++++++++++------------- Objects/listsort.txt | 19 ++--- 2 files changed, 141 insertions(+), 66 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index fc20a9bff3af47..470ad8eb8135db 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1628,6 +1628,15 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) /* Avoid malloc for small temp arrays. */ #define MERGESTATE_TEMP_SIZE 256 +/* The largest value of minrun. This must be a power of 2, and >= 1, so that + * the compute_minrun() algorithm guarantees to return a result no larger than + * this, + */ +#define MAX_MINRUN 64 +#if ((MAX_MINRUN) < 1) || ((MAX_MINRUN) & ((MAX_MINRUN) - 1)) +#error "MAX_MINRUN must be a power of 2, and >= 1" +#endif + /* One MergeState exists on the stack per invocation of mergesort. It's just * a convenient way to pass state around among the helper functions. */ @@ -1685,68 +1694,133 @@ struct s_MergeState { int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *); }; -/* binarysort is the best method for sorting small arrays: it does - few compares, but can do data movement quadratic in the number of - elements. - [lo.keys, hi) is a contiguous slice of a list of keys, and is sorted via - binary insertion. This sort is stable. - On entry, must have lo.keys <= start <= hi, and that - [lo.keys, start) is already sorted (pass start == lo.keys if you don't - know!). - If islt() complains return -1, else 0. +/* binarysort is the best method for sorting small arrays: it does few + compares, but can do data movement quadratic in the number of elements. + ss->keys is viewed as an array of n kays, a[:n]. a[:ok] is already sorted. + Pass ok = 0 (or 1) if you don't know. + It's sorted in-place, by a stable binary insertion sort. If ss->values + isn't NULL, it's permuted in lockstap with ss->keys. + On entry, must have n >= 1, and 0 <= ok <= n <= MAX_MINRUN. + Return -1 if comparison raises an exception, else 0. Even in case of error, the output slice will be some permutation of the input (nothing is lost or duplicated). */ static int -binarysort(MergeState *ms, sortslice lo, PyObject **hi, PyObject **start) +binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok) { - Py_ssize_t k; - PyObject **l, **p, **r; + Py_ssize_t k; /* for IFLT macro expansion */ + PyObject ** const a = ss->keys; + PyObject ** const v = ss->values; + const bool has_values = v != NULL; PyObject *pivot; - - assert(lo.keys <= start && start <= hi); - /* assert [lo.keys, start) is sorted */ - if (lo.keys == start) - ++start; - for (; start < hi; ++start) { - /* set l to where *start belongs */ - l = lo.keys; - r = start; - pivot = *r; - /* Invariants: - * pivot >= all in [lo.keys, l). - * pivot < all in [r, start). - * These are vacuously true at the start. + Py_ssize_t M; + + assert(0 <= ok && ok <= n && 1 <= n && n <= MAX_MINRUN); + /* assert a[:ok] is sorted */ + if (! ok) + ++ok; + /* Regular insertion sort has average- and worst-case O(n**2) cost + for both # of comparisons and number of bytes moved. But its branches + are highly predictable, and it loves sorted input (n-1 compares and no + data movement). This is significant in cases like sortperf.py's %sort, + where an out-of-order element near the start of a run is moved into + place slowly but then the remaining elements up to length minrun are + generally at worst one slot away from their correct position (so only + need 1 or 2 commpares to resolve). If comparisons are very fast (such + as for a list of Python floats), the simple inner loop leaves it + very competitive with binary insertion, despite that it does + significantly more compares overall on random data. + + Binary insertion sort has worst, average, and best case O(n log n) + cost for # of comparisons, but worst and average case O(n**2) cost + for data movement. The more expensive comparisons, the more important + the comparison advantage. But its branches are less predictable the + more "randomish" the data, and that's so significant its worst case + in real life is random input rather than reverse-ordered (which does + about twice the data movement than random input does). + + Note that the number of bytes moved doesn't seem to matter. MAX_MINRUN + of 64 is so small that the key and value pointers all fit in a corner + of L1 cache, and moving things around in that is very fast. */ +#if 0 // ordinary insertion sort. + PyObject * vpivot = NULL; + for (; ok < n; ++ok) { + pivot = a[ok]; + if (has_values) + vpivot = v[ok]; + for (M = ok - 1; M >= 0; --M) { + k = ISLT(pivot, a[M]); + if (k < 0) { + a[M + 1] = pivot; + if (has_values) + v[M + 1] = vpivot; + goto fail; + } + else if (k) { + a[M + 1] = a[M]; + if (has_values) + v[M + 1] = v[M]; + } + else + break; + } + a[M + 1] = pivot; + if (has_values) + v[M + 1] = vpivot; + } +#else // binary insertion sort + Py_ssize_t L, R; + for (; ok < n; ++ok) { + /* set L to where a[ok] belongs */ + L = 0; + R = ok; + pivot = a[ok]; + /* Slice invariants. vacuously true at the start: + * all a[0:L] <= pivot + * all a[L:R] unknown + * all a[R:ok] > pivot */ - assert(l < r); + assert(L < R); do { - p = l + ((r - l) >> 1); - IFLT(pivot, *p) - r = p; + /* don't do silly ;-) things to prevent overflow when finding + the midpoint; L and R are very far from filling a Py_ssize_t */ + M = (L + R) >> 1; +#if 1 // straightforward, but highly unpredictable branch on random data + IFLT(pivot, a[M]) + R = M; else - l = p+1; - } while (l < r); - assert(l == r); - /* The invariants still hold, so pivot >= all in [lo.keys, l) and - pivot < all in [l, start), so pivot belongs at l. Note - that if there are elements equal to pivot, l points to the - first slot after them -- that's why this sort is stable. - Slide over to make room. - Caution: using memmove is much slower under MSVC 5; - we're not usually moving many slots. */ - for (p = start; p > l; --p) - *p = *(p-1); - *l = pivot; - if (lo.values != NULL) { - Py_ssize_t offset = lo.values - lo.keys; - p = start + offset; - pivot = *p; - l += offset; - for ( ; p > l; --p) - *p = *(p-1); - *l = pivot; + L = M + 1; +#else + /* Try to get compiler to generate conditional move instructions + instead. Works fine, but leaving it disabled for now because + it's not yielding consistently faster sorts. Needs more + investigation. More computation in the inner loop adds its own + costs, which can be significant when compares are fast. */ + k = ISLT(pivot, a[M]); + if (k < 0) + goto fail; + Py_ssize_t Mp1 = M + 1; + R = k ? M : R; + L = k ? L : Mp1; +#endif + } while (L < R); + assert(L == R); + /* a[:L] holds all elements from a[:ok] <= pivot now, so pivot belongs + at index L. Slide a[L:ok] to the right a slot to make room for it. + Caution: using memmove is much slower under MSVC 5; we're not + usually moving many slots. Years later: under Visual Studio 2022, + memmove seems just slightly slower than doing it "by hand". */ + for (M = ok; M > L; --M) + a[M] = a[M - 1]; + a[L] = pivot; + if (has_values) { + pivot = v[ok]; + for (M = ok; M > L; --M) + v[M] = v[M - 1]; + v[L] = pivot; } } +#endif // pick binary or regular insertion sort return 0; fail: @@ -2559,10 +2633,10 @@ merge_force_collapse(MergeState *ms) /* Compute a good value for the minimum run length; natural runs shorter * than this are boosted artificially via binary insertion. * - * If n < 64, return n (it's too small to bother with fancy stuff). - * Else if n is an exact power of 2, return 32. - * Else return an int k, 32 <= k <= 64, such that n/k is close to, but - * strictly less than, an exact power of 2. + * If n < MAX_MINRUN return n (it's too small to bother with fancy stuff). + * Else if n is an exact power of 2, return MAX_MINRUN / 2. + * Else return an int k, MAX_MINRUN / 2 <= k <= MAX_MINRUN, such that n/k is + * close to, but strictly less than, an exact power of 2. * * See listsort.txt for more info. */ @@ -2572,7 +2646,7 @@ merge_compute_minrun(Py_ssize_t n) Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */ assert(n >= 0); - while (n >= 64) { + while (n >= MAX_MINRUN) { r |= n & 1; n >>= 1; } @@ -2956,7 +3030,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (n < minrun) { const Py_ssize_t force = nremaining <= minrun ? nremaining : minrun; - if (binarysort(&ms, lo, lo.keys + force, lo.keys + n) < 0) + if (binarysort(&ms, &lo, force, n) < 0) goto fail; n = force; } diff --git a/Objects/listsort.txt b/Objects/listsort.txt index 4f84e2c87da7f1..f387d9c116e502 100644 --- a/Objects/listsort.txt +++ b/Objects/listsort.txt @@ -270,9 +270,9 @@ result. This has two primary good effects: Computing minrun ---------------- -If N < 64, minrun is N. IOW, binary insertion sort is used for the whole -array then; it's hard to beat that given the overheads of trying something -fancier (see note BINSORT). +If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the +whole array then; it's hard to beat that given the overheads of trying +something fancier (see note BINSORT). When N is a power of 2, testing on random data showed that minrun values of 16, 32, 64 and 128 worked about equally well. At 256 the data-movement cost @@ -310,12 +310,13 @@ place, and r < minrun is small compared to N), or q a little larger than a power of 2 regardless of r (then we've got a case similar to "2112", again leaving too little work for the last merge to do). -Instead we pick a minrun in range(32, 65) such that N/minrun is exactly a -power of 2, or if that isn't possible, is close to, but strictly less than, -a power of 2. This is easier to do than it may sound: take the first 6 -bits of N, and add 1 if any of the remaining bits are set. In fact, that -rule covers every case in this section, including small N and exact powers -of 2; merge_compute_minrun() is a deceptively simple function. +Instead we pick a minrun in range(MAX_MINRUN / 2, MAX_MINRUN + 1) such that +N/minrun is exactly a power of 2, or if that isn't possible, is close to, but +strictly less than, a power of 2. This is easier to do than it may sound: +take the first log2(MAX_MINRUN) bits of N, and add 1 if any of the remaining +bits are set. In fact, that rule covers every case in this section, including +small N and exact powers of 2; merge_compute_minrun() is a deceptively simple +function. The Merge Pattern From 42ae924d278c48a719fb0ab86357f3235a9f7ab9 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 22 Mar 2024 10:42:18 +0100 Subject: [PATCH 17/39] gh-117127: glob tests: Reopen dir_fd to pick up directory changes (GH-117128) --- Lib/test/test_glob.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 2de997501039ad..6719bdbb0cc9b1 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -41,6 +41,11 @@ def setUp(self): os.symlink(self.norm('broken'), self.norm('sym1')) os.symlink('broken', self.norm('sym2')) os.symlink(os.path.join('a', 'bcd'), self.norm('sym3')) + self.open_dirfd() + + def open_dirfd(self): + if self.dir_fd is not None: + os.close(self.dir_fd) if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd: self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY) else: @@ -350,6 +355,10 @@ def test_glob_non_directory(self): def test_glob_named_pipe(self): path = os.path.join(self.tempdir, 'mypipe') os.mkfifo(path) + + # gh-117127: Reopen self.dir_fd to pick up directory changes + self.open_dirfd() + self.assertEqual(self.rglob('mypipe'), [path]) self.assertEqual(self.rglob('mypipe*'), [path]) self.assertEqual(self.rglob('mypipe', ''), []) From 63d6f2623ef2aa90f51c6a928b96845b9b380d89 Mon Sep 17 00:00:00 2001 From: NGRsoftlab <78017794+NGRsoftlab@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:25:38 +0300 Subject: [PATCH 18/39] gh-117068: Remove useless code in bytesio.c:resize_buffer() (GH-117069) Co-authored-by: i.khabibulin --- Modules/_io/bytesio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index 4a15c8e841f25f..fb66d3db0f7a1f 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -155,9 +155,6 @@ resize_buffer(bytesio *self, size_t size) alloc = size + 1; } - if (alloc > ((size_t)-1) / sizeof(char)) - goto overflow; - if (SHARED_BUF(self)) { if (unshare_buffer(self, alloc) < 0) return -1; From 3be9b9d8722696b95555937bb211dc4cda714d56 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 22 Mar 2024 15:00:50 +0000 Subject: [PATCH 19/39] Fix get_packagefamilyname helper function on Windows 32-bit (GH-117153) --- PC/layout/support/appxmanifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PC/layout/support/appxmanifest.py b/PC/layout/support/appxmanifest.py index 1fb03380278f43..53977beb8af834 100644 --- a/PC/layout/support/appxmanifest.py +++ b/PC/layout/support/appxmanifest.py @@ -209,7 +209,7 @@ class PACKAGE_ID(ctypes.Structure): result = ctypes.create_unicode_buffer(256) result_len = ctypes.c_uint32(256) r = ctypes.windll.kernel32.PackageFamilyNameFromId( - pid, ctypes.byref(result_len), result + ctypes.byref(pid), ctypes.byref(result_len), result ) if r: raise OSError(r, "failed to get package family name") From 40d75c2b7f5c67e254d0a025e0f2e2c7ada7f69f Mon Sep 17 00:00:00 2001 From: Jakub Stasiak Date: Fri, 22 Mar 2024 17:49:56 +0100 Subject: [PATCH 20/39] GH-113171: Fix "private" (non-global) IP address ranges (GH-113179) * GH-113171: Fix "private" (really non-global) IP address ranges The _private_networks variables, used by various is_private implementations, were missing some ranges and at the same time had overly strict ranges (where there are more specific ranges considered globally reachable by the IANA registries). This patch updates the ranges with what was missing or otherwise incorrect. I left 100.64.0.0/10 alone, for now, as it's been made special in [1] and I'm not sure if we want to undo that as I don't quite understand the motivation behind it. The _address_exclude_many() call returns 8 networks for IPv4, 121 networks for IPv6. [1] https://github.com/python/cpython/issues/61602 --- Doc/library/ipaddress.rst | 16 ++++++++ Doc/whatsnew/3.13.rst | 2 + Lib/ipaddress.py | 41 ++++++++++++++++--- Lib/test/test_ipaddress.py | 21 +++++++++- ...-03-14-01-38-44.gh-issue-113171.VFnObz.rst | 9 ++++ 5 files changed, 82 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst diff --git a/Doc/library/ipaddress.rst b/Doc/library/ipaddress.rst index 73f4960082617b..8f090b5eec5980 100644 --- a/Doc/library/ipaddress.rst +++ b/Doc/library/ipaddress.rst @@ -192,6 +192,18 @@ write code that handles both IP versions correctly. Address objects are ``is_private`` has value opposite to :attr:`is_global`, except for the shared address space (``100.64.0.0/10`` range) where they are both ``False``. + .. versionchanged:: 3.13 + + Fixed some false positives and false negatives. + + * ``192.0.0.0/24`` is considered private with the exception of ``192.0.0.9/32`` and + ``192.0.0.10/32`` (previously: only the ``192.0.0.0/29`` sub-range was considered private). + * ``64:ff9b:1::/48`` is considered private. + * ``2002::/16`` is considered private. + * There are exceptions within ``2001::/23`` (otherwise considered private): ``2001:1::1/128``, + ``2001:1::2/128``, ``2001:3::/32``, ``2001:4:112::/48``, ``2001:20::/28``, ``2001:30::/28``. + The exceptions are not considered private. + .. attribute:: is_global ``True`` if the address is defined as globally reachable by @@ -209,6 +221,10 @@ write code that handles both IP versions correctly. Address objects are .. versionadded:: 3.4 + .. versionchanged:: 3.13 + + Fixed some false positives and false negatives, see :attr:`is_private` for details. + .. attribute:: is_unspecified ``True`` if the address is unspecified. See :RFC:`5735` (for IPv4) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 7e6c79dbf50aac..bec788e7ed2b0e 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -401,6 +401,8 @@ ipaddress * Add the :attr:`ipaddress.IPv4Address.ipv6_mapped` property, which returns the IPv4-mapped IPv6 address. (Contributed by Charles Machalow in :gh:`109466`.) +* Fix ``is_global`` and ``is_private`` behavior in ``IPv4Address``, ``IPv6Address``, ``IPv4Network`` + and ``IPv6Network``. itertools --------- diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index 7d6edcf2478a82..22cdfc93d8ad32 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -1086,7 +1086,11 @@ def is_private(self): """ return any(self.network_address in priv_network and self.broadcast_address in priv_network - for priv_network in self._constants._private_networks) + for priv_network in self._constants._private_networks) and all( + self.network_address not in network and + self.broadcast_address not in network + for network in self._constants._private_networks_exceptions + ) @property def is_global(self): @@ -1347,7 +1351,10 @@ def is_private(self): ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` IPv4 range where they are both ``False``. """ - return any(self in net for net in self._constants._private_networks) + return ( + any(self in net for net in self._constants._private_networks) + and all(self not in net for net in self._constants._private_networks_exceptions) + ) @property @functools.lru_cache() @@ -1578,13 +1585,15 @@ class _IPv4Constants: _public_network = IPv4Network('100.64.0.0/10') + # Not globally reachable address blocks listed on + # https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml _private_networks = [ IPv4Network('0.0.0.0/8'), IPv4Network('10.0.0.0/8'), IPv4Network('127.0.0.0/8'), IPv4Network('169.254.0.0/16'), IPv4Network('172.16.0.0/12'), - IPv4Network('192.0.0.0/29'), + IPv4Network('192.0.0.0/24'), IPv4Network('192.0.0.170/31'), IPv4Network('192.0.2.0/24'), IPv4Network('192.168.0.0/16'), @@ -1595,6 +1604,11 @@ class _IPv4Constants: IPv4Network('255.255.255.255/32'), ] + _private_networks_exceptions = [ + IPv4Network('192.0.0.9/32'), + IPv4Network('192.0.0.10/32'), + ] + _reserved_network = IPv4Network('240.0.0.0/4') _unspecified_address = IPv4Address('0.0.0.0') @@ -2086,7 +2100,10 @@ def is_private(self): ipv4_mapped = self.ipv4_mapped if ipv4_mapped is not None: return ipv4_mapped.is_private - return any(self in net for net in self._constants._private_networks) + return ( + any(self in net for net in self._constants._private_networks) + and all(self not in net for net in self._constants._private_networks_exceptions) + ) @property def is_global(self): @@ -2342,19 +2359,31 @@ class _IPv6Constants: _multicast_network = IPv6Network('ff00::/8') + # Not globally reachable address blocks listed on + # https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml _private_networks = [ IPv6Network('::1/128'), IPv6Network('::/128'), IPv6Network('::ffff:0:0/96'), + IPv6Network('64:ff9b:1::/48'), IPv6Network('100::/64'), IPv6Network('2001::/23'), - IPv6Network('2001:2::/48'), IPv6Network('2001:db8::/32'), - IPv6Network('2001:10::/28'), + # IANA says N/A, let's consider it not globally reachable to be safe + IPv6Network('2002::/16'), IPv6Network('fc00::/7'), IPv6Network('fe80::/10'), ] + _private_networks_exceptions = [ + IPv6Network('2001:1::1/128'), + IPv6Network('2001:1::2/128'), + IPv6Network('2001:3::/32'), + IPv6Network('2001:4:112::/48'), + IPv6Network('2001:20::/28'), + IPv6Network('2001:30::/28'), + ] + _reserved_networks = [ IPv6Network('::/8'), IPv6Network('100::/8'), IPv6Network('200::/7'), IPv6Network('400::/6'), diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py index b4952acc2b61b1..f1519df673747a 100644 --- a/Lib/test/test_ipaddress.py +++ b/Lib/test/test_ipaddress.py @@ -2288,6 +2288,10 @@ def testReservedIpv4(self): self.assertEqual(True, ipaddress.ip_address( '172.31.255.255').is_private) self.assertEqual(False, ipaddress.ip_address('172.32.0.0').is_private) + self.assertFalse(ipaddress.ip_address('192.0.0.0').is_global) + self.assertTrue(ipaddress.ip_address('192.0.0.9').is_global) + self.assertTrue(ipaddress.ip_address('192.0.0.10').is_global) + self.assertFalse(ipaddress.ip_address('192.0.0.255').is_global) self.assertEqual(True, ipaddress.ip_address('169.254.100.200').is_link_local) @@ -2313,6 +2317,7 @@ def testPrivateNetworks(self): self.assertEqual(True, ipaddress.ip_network("169.254.0.0/16").is_private) self.assertEqual(True, ipaddress.ip_network("172.16.0.0/12").is_private) self.assertEqual(True, ipaddress.ip_network("192.0.0.0/29").is_private) + self.assertEqual(False, ipaddress.ip_network("192.0.0.9/32").is_private) self.assertEqual(True, ipaddress.ip_network("192.0.0.170/31").is_private) self.assertEqual(True, ipaddress.ip_network("192.0.2.0/24").is_private) self.assertEqual(True, ipaddress.ip_network("192.168.0.0/16").is_private) @@ -2329,8 +2334,8 @@ def testPrivateNetworks(self): self.assertEqual(True, ipaddress.ip_network("::/128").is_private) self.assertEqual(True, ipaddress.ip_network("::ffff:0:0/96").is_private) self.assertEqual(True, ipaddress.ip_network("100::/64").is_private) - self.assertEqual(True, ipaddress.ip_network("2001::/23").is_private) self.assertEqual(True, ipaddress.ip_network("2001:2::/48").is_private) + self.assertEqual(False, ipaddress.ip_network("2001:3::/48").is_private) self.assertEqual(True, ipaddress.ip_network("2001:db8::/32").is_private) self.assertEqual(True, ipaddress.ip_network("2001:10::/28").is_private) self.assertEqual(True, ipaddress.ip_network("fc00::/7").is_private) @@ -2409,6 +2414,20 @@ def testReservedIpv6(self): self.assertEqual(True, ipaddress.ip_address('0::0').is_unspecified) self.assertEqual(False, ipaddress.ip_address('::1').is_unspecified) + self.assertFalse(ipaddress.ip_address('64:ff9b:1::').is_global) + self.assertFalse(ipaddress.ip_address('2001::').is_global) + self.assertTrue(ipaddress.ip_address('2001:1::1').is_global) + self.assertTrue(ipaddress.ip_address('2001:1::2').is_global) + self.assertFalse(ipaddress.ip_address('2001:2::').is_global) + self.assertTrue(ipaddress.ip_address('2001:3::').is_global) + self.assertFalse(ipaddress.ip_address('2001:4::').is_global) + self.assertTrue(ipaddress.ip_address('2001:4:112::').is_global) + self.assertFalse(ipaddress.ip_address('2001:10::').is_global) + self.assertTrue(ipaddress.ip_address('2001:20::').is_global) + self.assertTrue(ipaddress.ip_address('2001:30::').is_global) + self.assertFalse(ipaddress.ip_address('2001:40::').is_global) + self.assertFalse(ipaddress.ip_address('2002::').is_global) + # some generic IETF reserved addresses self.assertEqual(True, ipaddress.ip_address('100::').is_reserved) self.assertEqual(True, ipaddress.ip_network('4000::1/128').is_reserved) diff --git a/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst b/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst new file mode 100644 index 00000000000000..f9a72473be4e2c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst @@ -0,0 +1,9 @@ +Fixed various false positives and false negatives in + +* :attr:`ipaddress.IPv4Address.is_private` (see these docs for details) +* :attr:`ipaddress.IPv4Address.is_global` +* :attr:`ipaddress.IPv6Address.is_private` +* :attr:`ipaddress.IPv6Address.is_global` + +Also in the corresponding :class:`ipaddress.IPv4Network` and :class:`ipaddress.IPv6Network` +attributes. From 00baaa21de229a6db80ff2b84c2fd6ad1999a24c Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Fri, 22 Mar 2024 17:25:51 +0000 Subject: [PATCH 21/39] [docs] Fix typo in docstring and add example to logging cookbook. (GH-117157) --- Doc/howto/logging-cookbook.rst | 140 +++++++++++++++++++++++++++++---- Lib/logging/__init__.py | 2 +- 2 files changed, 127 insertions(+), 15 deletions(-) diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index ad3e34d0b33bd2..d8ebeabcd522b1 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -1846,8 +1846,11 @@ the use of a :class:`Filter` does not provide the desired result. .. _zeromq-handlers: -Subclassing QueueHandler - a ZeroMQ example -------------------------------------------- +Subclassing QueueHandler and QueueListener- a ZeroMQ example +------------------------------------------------------------ + +Subclass ``QueueHandler`` +^^^^^^^^^^^^^^^^^^^^^^^^^ You can use a :class:`QueueHandler` subclass to send messages to other kinds of queues, for example a ZeroMQ 'publish' socket. In the example below,the @@ -1885,8 +1888,8 @@ data needed by the handler to create the socket:: self.queue.close() -Subclassing QueueListener - a ZeroMQ example --------------------------------------------- +Subclass ``QueueListener`` +^^^^^^^^^^^^^^^^^^^^^^^^^^ You can also subclass :class:`QueueListener` to get messages from other kinds of queues, for example a ZeroMQ 'subscribe' socket. Here's an example:: @@ -1903,25 +1906,134 @@ of queues, for example a ZeroMQ 'subscribe' socket. Here's an example:: msg = self.queue.recv_json() return logging.makeLogRecord(msg) +.. _pynng-handlers: -.. seealso:: +Subclassing QueueHandler and QueueListener- a ``pynng`` example +--------------------------------------------------------------- - Module :mod:`logging` - API reference for the logging module. +In a similar way to the above section, we can implement a listener and handler +using `pynng `_, which is a Python binding to +`NNG `_, billed as a spiritual successor to ZeroMQ. +The following snippets illustrate -- you can test them in an environment which has +``pynng`` installed. Juat for variety, we present the listener first. - Module :mod:`logging.config` - Configuration API for the logging module. - Module :mod:`logging.handlers` - Useful handlers included with the logging module. +Subclass ``QueueListener`` +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: python + + import json + import logging + import logging.handlers + + import pynng - :ref:`A basic logging tutorial ` + DEFAULT_ADDR = "tcp://localhost:13232" - :ref:`A more advanced logging tutorial ` + interrupted = False + class NNGSocketListener(logging.handlers.QueueListener): + + def __init__(self, uri, /, *handlers, **kwargs): + # Have a timeout for interruptability, and open a + # subscriber socket + socket = pynng.Sub0(listen=uri, recv_timeout=500) + # The b'' subscription matches all topics + topics = kwargs.pop('topics', None) or b'' + socket.subscribe(topics) + # We treat the socket as a queue + super().__init__(socket, *handlers, **kwargs) + + def dequeue(self, block): + data = None + # Keep looping while not interrupted and no data received over the + # socket + while not interrupted: + try: + data = self.queue.recv(block=block) + break + except pynng.Timeout: + pass + except pynng.Closed: # sometimes hit when you hit Ctrl-C + break + if data is None: + return None + # Get the logging event sent from a publisher + event = json.loads(data.decode('utf-8')) + return logging.makeLogRecord(event) + + def enqueue_sentinel(self): + # Not used in this implementation, as the socket isn't really a + # queue + pass + + logging.getLogger('pynng').propagate = False + listener = NNGSocketListener(DEFAULT_ADDR, logging.StreamHandler(), topics=b'') + listener.start() + print('Press Ctrl-C to stop.') + try: + while True: + pass + except KeyboardInterrupt: + interrupted = True + finally: + listener.stop() + + +Subclass ``QueueHandler`` +^^^^^^^^^^^^^^^^^^^^^^^^^ .. currentmodule:: logging +.. code-block:: python + + import json + import logging + import logging.handlers + import time + import random + + import pynng + + DEFAULT_ADDR = "tcp://localhost:13232" + + class NNGSocketHandler(logging.handlers.QueueHandler): + + def __init__(self, uri): + socket = pynng.Pub0(dial=uri, send_timeout=500) + super().__init__(socket) + + def enqueue(self, record): + # Send the record as UTF-8 encoded JSON + d = dict(record.__dict__) + data = json.dumps(d) + self.queue.send(data.encode('utf-8')) + + def close(self): + self.queue.close() + + logging.getLogger('pynng').propagate = False + handler = NNGSocketHandler(DEFAULT_ADDR) + logging.basicConfig(level=logging.DEBUG, + handlers=[logging.StreamHandler(), handler], + format='%(levelname)-8s %(name)10s %(message)s') + levels = (logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, + logging.CRITICAL) + logger_names = ('myapp', 'myapp.lib1', 'myapp.lib2') + msgno = 1 + while True: + # Just randomly select some loggers and levels and log away + level = random.choice(levels) + logger = logging.getLogger(random.choice(logger_names)) + logger.log(level, 'Message no. %5d' % msgno) + msgno += 1 + delay = random.random() * 2 + 0.5 + time.sleep(delay) + +You can run the above two snippets in separate command shells. + + An example dictionary-based configuration ----------------------------------------- @@ -3418,7 +3530,7 @@ The worker thread is implemented using Qt's ``QThread`` class rather than the :mod:`threading` module, as there are circumstances where one has to use ``QThread``, which offers better integration with other ``Qt`` components. -The code should work with recent releases of either ``PySide6``, ``PyQt6``, +The code should work with recent releases of any of ``PySide6``, ``PyQt6``, ``PySide2`` or ``PyQt5``. You should be able to adapt the approach to earlier versions of Qt. Please refer to the comments in the code snippet for more detailed information. diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index fcec9e76b98661..927e3e653f065a 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -2013,7 +2013,7 @@ def basicConfig(**kwargs): that this argument is incompatible with 'filename' - if both are present, 'stream' is ignored. handlers If specified, this should be an iterable of already created - handlers, which will be added to the root handler. Any handler + handlers, which will be added to the root logger. Any handler in the list which does not have a formatter assigned will be assigned the formatter created in this function. force If this keyword is specified as true, any existing handlers From 5a78f6e798d5c2af1dba2df6c9f1f1e5aac02a86 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Mar 2024 20:03:48 +0200 Subject: [PATCH 22/39] gh-117134: Microoptimize glob() for include_hidden=True (GH-117135) --- Lib/glob.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index 473502c67336f9..d59641195a1c41 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -104,8 +104,8 @@ def _iglob(pathname, root_dir, dir_fd, recursive, dironly, def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False): names = _listdir(dirname, dir_fd, dironly) - if include_hidden or not _ishidden(pattern): - names = (x for x in names if include_hidden or not _ishidden(x)) + if not (include_hidden or _ishidden(pattern)): + names = (x for x in names if not _ishidden(x)) return fnmatch.filter(names, pattern) def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): From 567ab3bd15398c8c7b791f3e376ae3e3c0bbe079 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 22 Mar 2024 20:08:00 +0200 Subject: [PATCH 23/39] gh-117084: Fix ZIP file extraction for directory entry names with backslashes on Windows (GH-117129) --- Lib/test/archivetestdata/zipdir_backslash.zip | Bin 0 -> 192 bytes Lib/test/test_zipfile/test_core.py | 16 ++++++++++++++++ Lib/zipfile/__init__.py | 10 +++++++++- ...24-03-21-17-07-38.gh-issue-117084.w1mTpT.rst | 2 ++ 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 Lib/test/archivetestdata/zipdir_backslash.zip create mode 100644 Misc/NEWS.d/next/Library/2024-03-21-17-07-38.gh-issue-117084.w1mTpT.rst diff --git a/Lib/test/archivetestdata/zipdir_backslash.zip b/Lib/test/archivetestdata/zipdir_backslash.zip new file mode 100644 index 0000000000000000000000000000000000000000..979126ef5e37ebd46762c76439e9b4e77431103c GIT binary patch literal 192 zcmWIWW@Zs#0D Date: Fri, 22 Mar 2024 20:19:10 +0200 Subject: [PATCH 24/39] gh-113024: C API: Add PyObject_GenericHash() function (GH-113025) --- Doc/c-api/hash.rst | 11 +++++++++++ Doc/c-api/typeobj.rst | 4 ++++ Doc/whatsnew/3.13.rst | 4 ++++ Include/cpython/pyhash.h | 1 + Lib/test/test_capi/test_abstract.py | 6 ++++++ .../2023-12-12-19-48-31.gh-issue-113024.rXcQs7.rst | 1 + Modules/_decimal/_decimal.c | 2 +- Modules/_testcapi/hash.c | 11 +++++++++++ Objects/classobject.c | 2 +- Objects/descrobject.c | 2 +- Objects/methodobject.c | 2 +- Objects/typeobject.c | 8 +------- PC/winreg.c | 2 +- Python/pyhash.c | 8 +++++++- 14 files changed, 51 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-12-12-19-48-31.gh-issue-113024.rXcQs7.rst diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 1cf094cfcdca24..ddf0b3e15dbdbe 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -82,3 +82,14 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. The function cannot fail: it cannot return ``-1``. .. versionadded:: 3.13 + +.. c:function:: Py_hash_t PyObject_GenericHash(PyObject *obj) + + Generic hashing function that is meant to be put into a type + object's ``tp_hash`` slot. + Its result only depends on the object's identity. + + .. impl-detail:: + In CPython, it is equivalent to :c:func:`Py_HashPointer`. + + .. versionadded:: 3.13 diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 8a26f237652d12..e66ab01878cac0 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -883,6 +883,10 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:member:`~PyTypeObject.tp_richcompare` and :c:member:`~PyTypeObject.tp_hash`, when the subtype's :c:member:`~PyTypeObject.tp_richcompare` and :c:member:`~PyTypeObject.tp_hash` are both ``NULL``. + **Default:** + + :c:data:`PyBaseObject_Type` uses :c:func:`PyObject_GenericHash`. + .. c:member:: ternaryfunc PyTypeObject.tp_call diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index bec788e7ed2b0e..c9a93d58056747 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1702,6 +1702,10 @@ New Features * Add :c:func:`Py_HashPointer` function to hash a pointer. (Contributed by Victor Stinner in :gh:`111545`.) +* Add :c:func:`PyObject_GenericHash` function that implements the default + hashing function of a Python object. + (Contributed by Serhiy Storchaka in :gh:`113024`.) + * Add PyTime C API: * :c:type:`PyTime_t` type. diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h index b476c3f357de92..2f8e12c1423aa1 100644 --- a/Include/cpython/pyhash.h +++ b/Include/cpython/pyhash.h @@ -43,3 +43,4 @@ typedef struct { PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr); +PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *); diff --git a/Lib/test/test_capi/test_abstract.py b/Lib/test/test_capi/test_abstract.py index 7e6cc9a2d0154b..bc39036e90bf8b 100644 --- a/Lib/test/test_capi/test_abstract.py +++ b/Lib/test/test_capi/test_abstract.py @@ -1001,6 +1001,12 @@ def test_number_check(self): self.assertTrue(number_check(0.5)) self.assertFalse(number_check("1 + 1j")) + def test_object_generichash(self): + # Test PyObject_GenericHash() + generichash = _testcapi.object_generichash + for obj in object(), 1, 'string', []: + self.assertEqual(generichash(obj), object.__hash__(obj)) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2023-12-12-19-48-31.gh-issue-113024.rXcQs7.rst b/Misc/NEWS.d/next/C API/2023-12-12-19-48-31.gh-issue-113024.rXcQs7.rst new file mode 100644 index 00000000000000..60ed6e64c3b6b8 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-12-12-19-48-31.gh-issue-113024.rXcQs7.rst @@ -0,0 +1 @@ +Add :c:func:`PyObject_GenericHash` function. diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 5b053c73e20bc9..2481455ac0d143 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -4780,7 +4780,7 @@ _dec_hash(PyDecObject *v) return -1; } else if (mpd_isnan(MPD(v))) { - return _Py_HashPointer(v); + return PyObject_GenericHash((PyObject *)v); } else { return py_hash_inf * mpd_arith_sign(MPD(v)); diff --git a/Modules/_testcapi/hash.c b/Modules/_testcapi/hash.c index aee76787dcddb3..809d537bfef0d3 100644 --- a/Modules/_testcapi/hash.c +++ b/Modules/_testcapi/hash.c @@ -59,9 +59,20 @@ hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg) } +static PyObject * +object_generichash(PyObject *Py_UNUSED(module), PyObject *arg) +{ + NULLABLE(arg); + Py_hash_t hash = PyObject_GenericHash(arg); + Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); + return PyLong_FromLongLong(hash); +} + + static PyMethodDef test_methods[] = { {"hash_getfuncdef", hash_getfuncdef, METH_NOARGS}, {"hash_pointer", hash_pointer, METH_O}, + {"object_generichash", object_generichash, METH_O}, {NULL}, }; diff --git a/Objects/classobject.c b/Objects/classobject.c index d7e520f556d9a0..9cbb9442c6059c 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -301,7 +301,7 @@ static Py_hash_t method_hash(PyMethodObject *a) { Py_hash_t x, y; - x = _Py_HashPointer(a->im_self); + x = PyObject_GenericHash(a->im_self); y = PyObject_Hash(a->im_func); if (y == -1) return -1; diff --git a/Objects/descrobject.c b/Objects/descrobject.c index df546a090c28e4..3423f152ce862d 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -1346,7 +1346,7 @@ wrapper_hash(PyObject *self) { wrapperobject *wp = (wrapperobject *)self; Py_hash_t x, y; - x = _Py_HashPointer(wp->self); + x = PyObject_GenericHash(wp->self); y = _Py_HashPointer(wp->descr); x = x ^ y; if (x == -1) diff --git a/Objects/methodobject.c b/Objects/methodobject.c index 599fb05cb5874f..d6773a264101dc 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -320,7 +320,7 @@ static Py_hash_t meth_hash(PyCFunctionObject *a) { Py_hash_t x, y; - x = _Py_HashPointer(a->m_self); + x = PyObject_GenericHash(a->m_self); y = _Py_HashPointer((void*)(a->m_ml->ml_meth)); x ^= y; if (x == -1) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 06c2fc8e6ca072..82822784aaf407 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6891,12 +6891,6 @@ PyDoc_STRVAR(object_doc, "When called, it accepts no arguments and returns a new featureless\n" "instance that has no instance attributes and cannot be given any.\n"); -static Py_hash_t -object_hash(PyObject *obj) -{ - return _Py_HashPointer(obj); -} - PyTypeObject PyBaseObject_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "object", /* tp_name */ @@ -6911,7 +6905,7 @@ PyTypeObject PyBaseObject_Type = { 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ - object_hash, /* tp_hash */ + PyObject_GenericHash, /* tp_hash */ 0, /* tp_call */ object_str, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ diff --git a/PC/winreg.c b/PC/winreg.c index 77b80217ac0ab1..8096d17e43b7bc 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -200,7 +200,7 @@ PyHKEY_hashFunc(PyObject *ob) /* Just use the address. XXX - should we use the handle value? */ - return _Py_HashPointer(ob); + return PyObject_GenericHash(ob); } diff --git a/Python/pyhash.c b/Python/pyhash.c index 141407c265677a..d508d78092a9e7 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -94,7 +94,7 @@ _Py_HashDouble(PyObject *inst, double v) if (Py_IS_INFINITY(v)) return v > 0 ? _PyHASH_INF : -_PyHASH_INF; else - return _Py_HashPointer(inst); + return PyObject_GenericHash(inst); } m = frexp(v, &e); @@ -139,6 +139,12 @@ Py_HashPointer(const void *ptr) return hash; } +Py_hash_t +PyObject_GenericHash(PyObject *obj) +{ + return Py_HashPointer(obj); +} + Py_hash_t _Py_HashBytes(const void *src, Py_ssize_t len) { From e28477f214276db941e715eebc8cdfb96c1207d9 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Mar 2024 18:43:25 +0000 Subject: [PATCH 25/39] GH-117108: Change the size of the GC increment to about 1% of the total heap size. (GH-117120) --- Include/internal/pycore_gc.h | 3 +- Lib/test/test_gc.py | 35 +++++++++++++------ ...-03-21-12-10-11.gh-issue-117108._6jIrB.rst | 3 ++ Modules/gcmodule.c | 2 +- Python/gc.c | 30 ++++++++-------- Python/gc_free_threading.c | 2 +- 6 files changed, 47 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 9d66e62ba8b5e3..e729616936f03b 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -282,6 +282,7 @@ struct _gc_runtime_state { /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; + Py_ssize_t heap_size; Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ int visited_space; @@ -321,7 +322,7 @@ extern void _PyGC_Unfreeze(PyInterpreterState *interp); /* Number of frozen objects */ extern Py_ssize_t _PyGC_GetFreezeCount(PyInterpreterState *interp); -extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation); +extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, int generation); extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs); // Functions to clear types free lists diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index ce01916bcabe4f..57acbac5859e7f 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1058,7 +1058,19 @@ class Z: callback.assert_not_called() gc.enable() + +class IncrementalGCTests(unittest.TestCase): + + def setUp(self): + # Reenable GC as it is disabled module-wide + gc.enable() + + def tearDown(self): + gc.disable() + @unittest.skipIf(Py_GIL_DISABLED, "Free threading does not support incremental GC") + # Use small increments to emulate longer running process in a shorter time + @gc_threshold(200, 10) def test_incremental_gc_handles_fast_cycle_creation(self): class LinkedList: @@ -1080,28 +1092,31 @@ def make_ll(depth): head = LinkedList(head, head.prev) return head - head = make_ll(10000) - count = 10000 + head = make_ll(1000) + count = 1000 - # We expect the counts to go negative eventually - # as there will some objects we aren't counting, - # e.g. the gc stats dicts. The test merely checks - # that the counts don't grow. + # There will be some objects we aren't counting, + # e.g. the gc stats dicts. This test checks + # that the counts don't grow, so we try to + # correct for the uncounted objects + # This is just an estimate. + CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] - for i in range(1000): - newhead = make_ll(200) - count += 200 + for i in range(20_000): + newhead = make_ll(20) + count += 20 newhead.surprise = head olds.append(newhead) - if len(olds) == 50: + if len(olds) == 20: stats = gc.get_stats() young = stats[0] incremental = stats[1] old = stats[2] collected = young['collected'] + incremental['collected'] + old['collected'] + count += CORRECTION live = count - collected self.assertLess(live, 25000) del olds[:] diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst b/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst new file mode 100644 index 00000000000000..57ad9606b05e05 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-03-21-12-10-11.gh-issue-117108._6jIrB.rst @@ -0,0 +1,3 @@ +The cycle GC now chooses the size of increments based on the total heap +size, instead of the rate of object creation. This ensures that it can keep +up with growing heaps. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 3320e54dd9fe93..8a1b483eddae35 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -326,7 +326,7 @@ gc_get_objects_impl(PyObject *module, Py_ssize_t generation) } PyInterpreterState *interp = _PyInterpreterState_GET(); - return _PyGC_GetObjects(interp, generation); + return _PyGC_GetObjects(interp, (int)generation); } /*[clinic input] diff --git a/Python/gc.c b/Python/gc.c index d0f4ce38bbe567..2517b86a41fa53 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -182,6 +182,7 @@ _PyGC_Init(PyInterpreterState *interp) if (gcstate->callbacks == NULL) { return _PyStatus_NO_MEMORY(); } + gcstate->heap_size = 0; return _PyStatus_OK(); } @@ -1232,7 +1233,7 @@ gc_collect_region(PyThreadState *tstate, struct gc_collection_stats *stats); static inline Py_ssize_t -gc_list_set_space(PyGC_Head *list, uintptr_t space) +gc_list_set_space(PyGC_Head *list, int space) { Py_ssize_t size = 0; PyGC_Head *gc; @@ -1258,9 +1259,9 @@ gc_list_set_space(PyGC_Head *list, uintptr_t space) * N == 1.4 (1 + 4/threshold) */ -/* Multiply by 4 so that the default incremental threshold of 10 - * scans objects at 20% the rate of object creation */ -#define SCAN_RATE_MULTIPLIER 2 +/* Divide by 10, so that the default incremental threshold of 10 + * scans objects at 1% of the heap size */ +#define SCAN_RATE_DIVISOR 10 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1313,7 +1314,7 @@ gc_collect_young(PyThreadState *tstate, if (scale_factor < 1) { scale_factor = 1; } - gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor; + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); } @@ -1384,12 +1385,12 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat static void completed_cycle(GCState *gcstate) { +#ifdef Py_DEBUG PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; assert(gc_list_is_empty(not_visited)); +#endif gcstate->visited_space = flip_old_space(gcstate->visited_space); - if (gcstate->work_to_do > 0) { - gcstate->work_to_do = 0; - } + gcstate->work_to_do = 0; } static void @@ -1404,13 +1405,13 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (scale_factor < 1) { scale_factor = 1; } - Py_ssize_t increment_size = 0; gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; if (gcstate->visited_space) { /* objects in visited space have bit set, so we set it here */ gc_list_set_space(&increment, 1); } + Py_ssize_t increment_size = 0; while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; @@ -1425,14 +1426,11 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head survivors; gc_list_init(&survivors); gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); - Py_ssize_t survivor_count = gc_list_size(&survivors); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor; + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; - if (gcstate->work_to_do < 0) { - gcstate->work_to_do = 0; - } + validate_old(gcstate); add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { @@ -1678,7 +1676,7 @@ _PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) } PyObject * -_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +_PyGC_GetObjects(PyInterpreterState *interp, int generation) { assert(generation >= -1 && generation < NUM_GENERATIONS); GCState *gcstate = &interp->gc; @@ -1974,6 +1972,7 @@ _PyObject_GC_Link(PyObject *op) gc->_gc_next = 0; gc->_gc_prev = 0; gcstate->young.count++; /* number of allocated GC objects */ + gcstate->heap_size++; if (gcstate->young.count > gcstate->young.threshold && gcstate->enabled && gcstate->young.threshold && @@ -2095,6 +2094,7 @@ PyObject_GC_Del(void *op) if (gcstate->young.count > 0) { gcstate->young.count--; } + gcstate->heap_size--; PyObject_Free(((char *)op)-presize); } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 52c79c02099b53..69ce22a1e83b62 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1305,7 +1305,7 @@ visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area, } PyObject * -_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +_PyGC_GetObjects(PyInterpreterState *interp, int generation) { PyObject *result = PyList_New(0); if (!result) { From 72eea512b88f8fd68b7258242c37da963ad87360 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 22 Mar 2024 19:14:09 +0000 Subject: [PATCH 26/39] GH-106747: Document another difference between `glob` and `pathlib`. (#116518) Document that `path.glob()` might return *path*, whereas `glob.glob(root_dir=path)` will never return an empty string corresponding to *path*. --- Doc/library/pathlib.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 3ff2631d73c0b2..4fba3622b073a7 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1682,6 +1682,10 @@ The patterns accepted and results generated by :meth:`Path.glob` and 5. The values returned from pathlib's ``path.glob()`` and ``path.rglob()`` include the *path* as a prefix, unlike the results of ``glob.glob(root_dir=path)``. +6. The values returned from pathlib's ``path.glob()`` and ``path.rglob()`` + may include *path* itself, for example when globbing "``**``", whereas the + results of ``glob.glob(root_dir=path)`` never include an empty string that + would correspond to *path*. Comparison to the :mod:`os` and :mod:`os.path` modules From 9967b568edd2e35b0415c14c7242f3ca2c0dc03d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 23 Mar 2024 13:01:20 +0100 Subject: [PATCH 27/39] gh-117008: Fix functools test_recursive_pickle() (#117009) Use support.infinite_recursion() in test_recursive_pickle() of test_functools to prevent a stack overflow on "ARM64 Windows Non-Debug" buildbot. Lower Py_C_RECURSION_LIMIT to 1,000 frames on Windows ARM64. --- Include/cpython/pystate.h | 2 ++ Lib/test/test_functools.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index f1540fde925921..7fb6b176392173 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -209,6 +209,8 @@ struct _ts { # define Py_C_RECURSION_LIMIT 500 #elif defined(__s390x__) # define Py_C_RECURSION_LIMIT 800 +#elif defined(_WIN32) && defined(_M_ARM64) +# define Py_C_RECURSION_LIMIT 1000 #elif defined(_WIN32) # define Py_C_RECURSION_LIMIT 3000 #elif defined(__ANDROID__) diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index 1a6d8afe6ed6fe..3ba4929dd1b133 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -334,8 +334,10 @@ def test_recursive_pickle(self): f.__setstate__((f, (), {}, {})) try: for proto in range(pickle.HIGHEST_PROTOCOL + 1): - with self.assertRaises(RecursionError): - pickle.dumps(f, proto) + # gh-117008: Small limit since pickle uses C stack memory + with support.infinite_recursion(100): + with self.assertRaises(RecursionError): + pickle.dumps(f, proto) finally: f.__setstate__((capture, (), {}, {})) From f11d0d8be8af28e1368c3c7c116218cf65ddf93e Mon Sep 17 00:00:00 2001 From: Erik Soma Date: Sat, 23 Mar 2024 11:39:35 -0400 Subject: [PATCH 28/39] gh-91227: Ignore ERROR_PORT_UNREACHABLE in proactor recvfrom() (#32011) --- Lib/asyncio/windows_events.py | 29 ++++--- Lib/test/test_asyncio/test_events.py | 74 +++++++++++++++++ Lib/test/test_asyncio/test_sock_lowlevel.py | 81 +++++++++++++++++++ ...4-02-24-23-03-43.gh-issue-91227.sL4zWC.rst | 1 + Modules/overlapped.c | 1 + 5 files changed, 174 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-02-24-23-03-43.gh-issue-91227.sL4zWC.rst diff --git a/Lib/asyncio/windows_events.py b/Lib/asyncio/windows_events.py index aca063e54c73f6..bf99bc271c7acd 100644 --- a/Lib/asyncio/windows_events.py +++ b/Lib/asyncio/windows_events.py @@ -8,6 +8,7 @@ import _overlapped import _winapi import errno +from functools import partial import math import msvcrt import socket @@ -467,6 +468,18 @@ def finish_socket_func(trans, key, ov): else: raise + @classmethod + def _finish_recvfrom(cls, trans, key, ov, *, empty_result): + try: + return cls.finish_socket_func(trans, key, ov) + except OSError as exc: + # WSARecvFrom will report ERROR_PORT_UNREACHABLE when the same + # socket is used to send to an address that is not listening. + if exc.winerror == _overlapped.ERROR_PORT_UNREACHABLE: + return empty_result, None + else: + raise + def recv(self, conn, nbytes, flags=0): self._register_with_iocp(conn) ov = _overlapped.Overlapped(NULL) @@ -501,7 +514,8 @@ def recvfrom(self, conn, nbytes, flags=0): except BrokenPipeError: return self._result((b'', None)) - return self._register(ov, conn, self.finish_socket_func) + return self._register(ov, conn, partial(self._finish_recvfrom, + empty_result=b'')) def recvfrom_into(self, conn, buf, flags=0): self._register_with_iocp(conn) @@ -511,17 +525,8 @@ def recvfrom_into(self, conn, buf, flags=0): except BrokenPipeError: return self._result((0, None)) - def finish_recv(trans, key, ov): - try: - return ov.getresult() - except OSError as exc: - if exc.winerror in (_overlapped.ERROR_NETNAME_DELETED, - _overlapped.ERROR_OPERATION_ABORTED): - raise ConnectionResetError(*exc.args) - else: - raise - - return self._register(ov, conn, finish_recv) + return self._register(ov, conn, partial(self._finish_recvfrom, + empty_result=0)) def sendto(self, conn, buf, flags=0, addr=None): self._register_with_iocp(conn) diff --git a/Lib/test/test_asyncio/test_events.py b/Lib/test/test_asyncio/test_events.py index ae0bff06479093..88c85a36b5d448 100644 --- a/Lib/test/test_asyncio/test_events.py +++ b/Lib/test/test_asyncio/test_events.py @@ -1378,6 +1378,80 @@ def test_create_datagram_endpoint_sock(self): tr.close() self.loop.run_until_complete(pr.done) + def test_datagram_send_to_non_listening_address(self): + # see: + # https://github.com/python/cpython/issues/91227 + # https://github.com/python/cpython/issues/88906 + # https://bugs.python.org/issue47071 + # https://bugs.python.org/issue44743 + # The Proactor event loop would fail to receive datagram messages after + # sending a message to an address that wasn't listening. + loop = self.loop + + class Protocol(asyncio.DatagramProtocol): + + _received_datagram = None + + def datagram_received(self, data, addr): + self._received_datagram.set_result(data) + + async def wait_for_datagram_received(self): + self._received_datagram = loop.create_future() + result = await asyncio.wait_for(self._received_datagram, 10) + self._received_datagram = None + return result + + def create_socket(): + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sock.setblocking(False) + sock.bind(('127.0.0.1', 0)) + return sock + + socket_1 = create_socket() + transport_1, protocol_1 = loop.run_until_complete( + loop.create_datagram_endpoint(Protocol, sock=socket_1) + ) + addr_1 = socket_1.getsockname() + + socket_2 = create_socket() + transport_2, protocol_2 = loop.run_until_complete( + loop.create_datagram_endpoint(Protocol, sock=socket_2) + ) + addr_2 = socket_2.getsockname() + + # creating and immediately closing this to try to get an address that + # is not listening + socket_3 = create_socket() + transport_3, protocol_3 = loop.run_until_complete( + loop.create_datagram_endpoint(Protocol, sock=socket_3) + ) + addr_3 = socket_3.getsockname() + transport_3.abort() + + transport_1.sendto(b'a', addr=addr_2) + self.assertEqual(loop.run_until_complete( + protocol_2.wait_for_datagram_received() + ), b'a') + + transport_2.sendto(b'b', addr=addr_1) + self.assertEqual(loop.run_until_complete( + protocol_1.wait_for_datagram_received() + ), b'b') + + # this should send to an address that isn't listening + transport_1.sendto(b'c', addr=addr_3) + loop.run_until_complete(asyncio.sleep(0)) + + # transport 1 should still be able to receive messages after sending to + # an address that wasn't listening + transport_2.sendto(b'd', addr=addr_1) + self.assertEqual(loop.run_until_complete( + protocol_1.wait_for_datagram_received() + ), b'd') + + transport_1.close() + transport_2.close() + def test_internal_fds(self): loop = self.create_event_loop() if not isinstance(loop, selector_events.BaseSelectorEventLoop): diff --git a/Lib/test/test_asyncio/test_sock_lowlevel.py b/Lib/test/test_asyncio/test_sock_lowlevel.py index 075113cbe8e4a6..acef24a703ba38 100644 --- a/Lib/test/test_asyncio/test_sock_lowlevel.py +++ b/Lib/test/test_asyncio/test_sock_lowlevel.py @@ -555,12 +555,93 @@ class SelectEventLoopTests(BaseSockTestsMixin, def create_event_loop(self): return asyncio.SelectorEventLoop() + class ProactorEventLoopTests(BaseSockTestsMixin, test_utils.TestCase): def create_event_loop(self): return asyncio.ProactorEventLoop() + + async def _basetest_datagram_send_to_non_listening_address(self, + recvfrom): + # see: + # https://github.com/python/cpython/issues/91227 + # https://github.com/python/cpython/issues/88906 + # https://bugs.python.org/issue47071 + # https://bugs.python.org/issue44743 + # The Proactor event loop would fail to receive datagram messages + # after sending a message to an address that wasn't listening. + + def create_socket(): + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sock.setblocking(False) + sock.bind(('127.0.0.1', 0)) + return sock + + socket_1 = create_socket() + addr_1 = socket_1.getsockname() + + socket_2 = create_socket() + addr_2 = socket_2.getsockname() + + # creating and immediately closing this to try to get an address + # that is not listening + socket_3 = create_socket() + addr_3 = socket_3.getsockname() + socket_3.shutdown(socket.SHUT_RDWR) + socket_3.close() + + socket_1_recv_task = self.loop.create_task(recvfrom(socket_1)) + socket_2_recv_task = self.loop.create_task(recvfrom(socket_2)) + await asyncio.sleep(0) + + await self.loop.sock_sendto(socket_1, b'a', addr_2) + self.assertEqual(await socket_2_recv_task, b'a') + + await self.loop.sock_sendto(socket_2, b'b', addr_1) + self.assertEqual(await socket_1_recv_task, b'b') + socket_1_recv_task = self.loop.create_task(recvfrom(socket_1)) + await asyncio.sleep(0) + + # this should send to an address that isn't listening + await self.loop.sock_sendto(socket_1, b'c', addr_3) + self.assertEqual(await socket_1_recv_task, b'') + socket_1_recv_task = self.loop.create_task(recvfrom(socket_1)) + await asyncio.sleep(0) + + # socket 1 should still be able to receive messages after sending + # to an address that wasn't listening + socket_2.sendto(b'd', addr_1) + self.assertEqual(await socket_1_recv_task, b'd') + + socket_1.shutdown(socket.SHUT_RDWR) + socket_1.close() + socket_2.shutdown(socket.SHUT_RDWR) + socket_2.close() + + + def test_datagram_send_to_non_listening_address_recvfrom(self): + async def recvfrom(socket): + data, _ = await self.loop.sock_recvfrom(socket, 4096) + return data + + self.loop.run_until_complete( + self._basetest_datagram_send_to_non_listening_address( + recvfrom)) + + + def test_datagram_send_to_non_listening_address_recvfrom_into(self): + async def recvfrom_into(socket): + buf = bytearray(4096) + length, _ = await self.loop.sock_recvfrom_into(socket, buf, + 4096) + return buf[:length] + + self.loop.run_until_complete( + self._basetest_datagram_send_to_non_listening_address( + recvfrom_into)) + else: import selectors diff --git a/Misc/NEWS.d/next/Windows/2024-02-24-23-03-43.gh-issue-91227.sL4zWC.rst b/Misc/NEWS.d/next/Windows/2024-02-24-23-03-43.gh-issue-91227.sL4zWC.rst new file mode 100644 index 00000000000000..8e53afdd619001 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-02-24-23-03-43.gh-issue-91227.sL4zWC.rst @@ -0,0 +1 @@ +Fix the asyncio ProactorEventLoop implementation so that sending a datagram to an address that is not listening does not prevent receiving any more datagrams. diff --git a/Modules/overlapped.c b/Modules/overlapped.c index 4c6c7531cdeab0..b9881d91ded244 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -2056,6 +2056,7 @@ overlapped_exec(PyObject *module) WINAPI_CONSTANT(F_DWORD, ERROR_OPERATION_ABORTED); WINAPI_CONSTANT(F_DWORD, ERROR_SEM_TIMEOUT); WINAPI_CONSTANT(F_DWORD, ERROR_PIPE_BUSY); + WINAPI_CONSTANT(F_DWORD, ERROR_PORT_UNREACHABLE); WINAPI_CONSTANT(F_DWORD, INFINITE); WINAPI_CONSTANT(F_HANDLE, INVALID_HANDLE_VALUE); WINAPI_CONSTANT(F_HANDLE, NULL); From 6c83352bfe78a7d567c8d76257df6eb91d5a7245 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sun, 24 Mar 2024 06:19:17 +0800 Subject: [PATCH 29/39] gh-117180: Complete call sequence when trace stack overflow (GH-117184) --------- Co-authored-by: Peter Lazorchak Co-authored-by: Guido van Rossum Co-authored-by: Guido van Rossum --- Lib/test/test_capi/test_opt.py | 26 ++++++++++++++++++++++++++ Python/optimizer.c | 1 + 2 files changed, 27 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b0859a382de523..a1dc03dd3b651b 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -955,6 +955,32 @@ def testfunc(n): _, ex = self._run_with_optimizer(testfunc, 16) self.assertIsNone(ex) + def test_many_nested(self): + # overflow the trace_stack + def dummy_a(x): + return x + def dummy_b(x): + return dummy_a(x) + def dummy_c(x): + return dummy_b(x) + def dummy_d(x): + return dummy_c(x) + def dummy_e(x): + return dummy_d(x) + def dummy_f(x): + return dummy_e(x) + def dummy_g(x): + return dummy_f(x) + def dummy_h(x): + return dummy_g(x) + def testfunc(n): + a = 0 + for _ in range(n): + a += dummy_h(n) + return a + + self._run_with_optimizer(testfunc, 32) + if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer.c b/Python/optimizer.c index 177ad343618c37..f8c1390a061650 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -476,6 +476,7 @@ BRANCH_TO_GUARD[4][2] = { if (trace_stack_depth >= TRACE_STACK_SIZE) { \ DPRINTF(2, "Trace stack overflow\n"); \ OPT_STAT_INC(trace_stack_overflow); \ + ADD_TO_TRACE(uop, oparg, operand, target); \ ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \ goto done; \ } \ From d610d821fd210dce63a1132c274ffdf8acc510bc Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Sat, 23 Mar 2024 22:32:33 +0000 Subject: [PATCH 30/39] gh-112383: teach dis how to interpret ENTER_EXECUTOR (#117171) --- Lib/dis.py | 29 +++++++-- Lib/test/test_capi/test_opt.py | 23 +++---- Lib/test/test_dis.py | 11 +--- ...-03-23-13-40-13.gh-issue-112383.XuHf3G.rst | 1 + Modules/_opcode.c | 23 +++++++ Modules/_testinternalcapi.c | 21 ------- Modules/clinic/_opcode.c.h | 62 ++++++++++++++++++- 7 files changed, 120 insertions(+), 50 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-03-23-13-40-13.gh-issue-112383.XuHf3G.rst diff --git a/Lib/dis.py b/Lib/dis.py index d146bcbb5097ef..111d624fc259c5 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -17,6 +17,8 @@ _specialized_opmap, ) +from _opcode import get_executor + __all__ = ["code_info", "dis", "disassemble", "distb", "disco", "findlinestarts", "findlabels", "show_code", "get_instructions", "Instruction", "Bytecode"] + _opcodes_all @@ -205,7 +207,27 @@ def _deoptop(op): return _all_opmap[deoptmap[name]] if name in deoptmap else op def _get_code_array(co, adaptive): - return co._co_code_adaptive if adaptive else co.co_code + if adaptive: + code = co._co_code_adaptive + res = [] + found = False + for i in range(0, len(code), 2): + op, arg = code[i], code[i+1] + if op == ENTER_EXECUTOR: + try: + ex = get_executor(co, i) + except ValueError: + ex = None + + if ex: + op, arg = ex.get_opcode(), ex.get_oparg() + found = True + + res.append(op.to_bytes()) + res.append(arg.to_bytes()) + return code if not found else b''.join(res) + else: + return co.co_code def code_info(x): """Formatted details of methods, functions, or code.""" @@ -514,8 +536,6 @@ def offset_from_jump_arg(self, op, arg, offset): argval = offset + 2 + signed_arg*2 caches = _get_cache_size(_all_opname[deop]) argval += 2 * caches - if deop == ENTER_EXECUTOR: - argval += 2 return argval return None @@ -680,8 +700,7 @@ def _parse_exception_table(code): def _is_backward_jump(op): return opname[op] in ('JUMP_BACKWARD', - 'JUMP_BACKWARD_NO_INTERRUPT', - 'ENTER_EXECUTOR') + 'JUMP_BACKWARD_NO_INTERRUPT') def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None, original_code=None, arg_resolver=None): diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index a1dc03dd3b651b..b59f4b74a8593e 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1,11 +1,11 @@ import contextlib -import opcode import sys import textwrap import unittest import gc import os +import _opcode import _testinternalcapi from test.support import script_helper, requires_specialization @@ -115,13 +115,11 @@ def testfunc(x): def get_first_executor(func): code = func.__code__ co_code = code.co_code - JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"] for i in range(0, len(co_code), 2): - if co_code[i] == JUMP_BACKWARD: - try: - return _testinternalcapi.get_executor(code, i) - except ValueError: - pass + try: + return _opcode.get_executor(code, i) + except ValueError: + pass return None @@ -760,17 +758,16 @@ def test_promote_globals_to_constants(self): result = script_helper.run_python_until_end('-c', textwrap.dedent(""" import _testinternalcapi import opcode + import _opcode def get_first_executor(func): code = func.__code__ co_code = code.co_code - JUMP_BACKWARD = opcode.opmap["JUMP_BACKWARD"] for i in range(0, len(co_code), 2): - if co_code[i] == JUMP_BACKWARD: - try: - return _testinternalcapi.get_executor(code, i) - except ValueError: - pass + try: + return _opcode.get_executor(code, i) + except ValueError: + pass return None def get_opnames(ex): diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index a93cb509b651c5..747a73829fa705 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1201,19 +1201,10 @@ def test_call_specialize(self): @cpython_only @requires_specialization def test_loop_quicken(self): - import _testinternalcapi # Loop can trigger a quicken where the loop is located - self.code_quicken(loop_test, 1) + self.code_quicken(loop_test, 4) got = self.get_disassembly(loop_test, adaptive=True) expected = dis_loop_test_quickened_code - if _testinternalcapi.get_optimizer(): - # We *may* see ENTER_EXECUTOR in the disassembly. This is a - # temporary hack to keep the test working until dis is able to - # handle the instruction correctly (GH-112383): - got = got.replace( - "ENTER_EXECUTOR 16", - "JUMP_BACKWARD 16 (to L1)", - ) self.do_disassembly_compare(got, expected) @cpython_only diff --git a/Misc/NEWS.d/next/Library/2024-03-23-13-40-13.gh-issue-112383.XuHf3G.rst b/Misc/NEWS.d/next/Library/2024-03-23-13-40-13.gh-issue-112383.XuHf3G.rst new file mode 100644 index 00000000000000..931e615c2b86c5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-23-13-40-13.gh-issue-112383.XuHf3G.rst @@ -0,0 +1 @@ +Fix :mod:`dis` module's handling of ``ENTER_EXECUTOR`` instructions. diff --git a/Modules/_opcode.c b/Modules/_opcode.c index 93c71377f03a76..5350adb456b859 100644 --- a/Modules/_opcode.c +++ b/Modules/_opcode.c @@ -347,6 +347,28 @@ _opcode_get_intrinsic2_descs_impl(PyObject *module) return list; } +/*[clinic input] + +_opcode.get_executor + + code: object + offset: int + +Return the executor object at offset in code if exists, None otherwise. +[clinic start generated code]*/ + +static PyObject * +_opcode_get_executor_impl(PyObject *module, PyObject *code, int offset) +/*[clinic end generated code: output=c035c7a47b16648f input=85eff93ea7aac282]*/ +{ + if (!PyCode_Check(code)) { + PyErr_Format(PyExc_TypeError, + "expected a code object, not '%.100s'", + Py_TYPE(code)->tp_name); + return NULL; + } + return (PyObject *)PyUnstable_GetExecutor((PyCodeObject *)code, offset); +} static PyMethodDef opcode_functions[] = { @@ -363,6 +385,7 @@ opcode_functions[] = { _OPCODE_GET_NB_OPS_METHODDEF _OPCODE_GET_INTRINSIC1_DESCS_METHODDEF _OPCODE_GET_INTRINSIC2_DESCS_METHODDEF + _OPCODE_GET_EXECUTOR_METHODDEF {NULL, NULL, 0, NULL} }; diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index e1717f7a66b1de..c07652facc0ae2 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -991,26 +991,6 @@ get_optimizer(PyObject *self, PyObject *Py_UNUSED(ignored)) return opt; } -static PyObject * -get_executor(PyObject *self, PyObject *const *args, Py_ssize_t nargs) -{ - - if (!_PyArg_CheckPositional("get_executor", nargs, 2, 2)) { - return NULL; - } - PyObject *code = args[0]; - PyObject *offset = args[1]; - long ioffset = PyLong_AsLong(offset); - if (ioffset == -1 && PyErr_Occurred()) { - return NULL; - } - if (!PyCode_Check(code)) { - PyErr_SetString(PyExc_TypeError, "first argument must be a code object"); - return NULL; - } - return (PyObject *)PyUnstable_GetExecutor((PyCodeObject *)code, ioffset); -} - static PyObject * add_executor_dependency(PyObject *self, PyObject *args) { @@ -1836,7 +1816,6 @@ static PyMethodDef module_functions[] = { {"iframe_getlasti", iframe_getlasti, METH_O, NULL}, {"get_optimizer", get_optimizer, METH_NOARGS, NULL}, {"set_optimizer", set_optimizer, METH_O, NULL}, - {"get_executor", _PyCFunction_CAST(get_executor), METH_FASTCALL, NULL}, {"new_counter_optimizer", new_counter_optimizer, METH_NOARGS, NULL}, {"new_uop_optimizer", new_uop_optimizer, METH_NOARGS, NULL}, {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, diff --git a/Modules/clinic/_opcode.c.h b/Modules/clinic/_opcode.c.h index c7fd0f9f8a7420..fb90fb8e32f918 100644 --- a/Modules/clinic/_opcode.c.h +++ b/Modules/clinic/_opcode.c.h @@ -668,4 +668,64 @@ _opcode_get_intrinsic2_descs(PyObject *module, PyObject *Py_UNUSED(ignored)) { return _opcode_get_intrinsic2_descs_impl(module); } -/*[clinic end generated code: output=a1052bb1deffb7f2 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_opcode_get_executor__doc__, +"get_executor($module, /, code, offset)\n" +"--\n" +"\n" +"Return the executor object at offset in code if exists, None otherwise."); + +#define _OPCODE_GET_EXECUTOR_METHODDEF \ + {"get_executor", _PyCFunction_CAST(_opcode_get_executor), METH_FASTCALL|METH_KEYWORDS, _opcode_get_executor__doc__}, + +static PyObject * +_opcode_get_executor_impl(PyObject *module, PyObject *code, int offset); + +static PyObject * +_opcode_get_executor(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(code), &_Py_ID(offset), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"code", "offset", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "get_executor", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject *code; + int offset; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf); + if (!args) { + goto exit; + } + code = args[0]; + offset = PyLong_AsInt(args[1]); + if (offset == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = _opcode_get_executor_impl(module, code, offset); + +exit: + return return_value; +} +/*[clinic end generated code: output=2dbb31b041b49c8f input=a9049054013a1b77]*/ From a1e948edba9ec6ba61365429857f7a087c5edf51 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 24 Mar 2024 11:35:58 +0200 Subject: [PATCH 31/39] Add cumulative option for the new statistics.kde() function. (#117033) --- Doc/library/statistics.rst | 13 ++++--- Lib/statistics.py | 67 ++++++++++++++++++++++++++++--------- Lib/test/test_statistics.py | 16 ++++++++- 3 files changed, 75 insertions(+), 21 deletions(-) diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index 1785c6bcc212b7..79c68123454524 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -261,11 +261,12 @@ However, for reading convenience, most of the examples show sorted sequences. Added support for *weights*. -.. function:: kde(data, h, kernel='normal') +.. function:: kde(data, h, kernel='normal', *, cumulative=False) `Kernel Density Estimation (KDE) `_: - Create a continuous probability density function from discrete samples. + Create a continuous probability density function or cumulative + distribution function from discrete samples. The basic idea is to smooth the data using `a kernel function `_. @@ -280,11 +281,13 @@ However, for reading convenience, most of the examples show sorted sequences. as much as the more influential bandwidth smoothing parameter. Kernels that give some weight to every sample point include - *normal* or *gauss*, *logistic*, and *sigmoid*. + *normal* (*gauss*), *logistic*, and *sigmoid*. Kernels that only give weight to sample points within the bandwidth - include *rectangular* or *uniform*, *triangular*, *parabolic* or - *epanechnikov*, *quartic* or *biweight*, *triweight*, and *cosine*. + include *rectangular* (*uniform*), *triangular*, *parabolic* + (*epanechnikov*), *quartic* (*biweight*), *triweight*, and *cosine*. + + If *cumulative* is true, will return a cumulative distribution function. A :exc:`StatisticsError` will be raised if the *data* sequence is empty. diff --git a/Lib/statistics.py b/Lib/statistics.py index 5d636258fd442b..58fb31def8896e 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -138,7 +138,7 @@ from itertools import count, groupby, repeat from bisect import bisect_left, bisect_right from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum, sumprod -from math import isfinite, isinf, pi, cos, cosh +from math import isfinite, isinf, pi, cos, sin, cosh, atan from functools import reduce from operator import itemgetter from collections import Counter, namedtuple, defaultdict @@ -803,9 +803,9 @@ def multimode(data): return [value for value, count in counts.items() if count == maxcount] -def kde(data, h, kernel='normal'): - """Kernel Density Estimation: Create a continuous probability - density function from discrete samples. +def kde(data, h, kernel='normal', *, cumulative=False): + """Kernel Density Estimation: Create a continuous probability density + function or cumulative distribution function from discrete samples. The basic idea is to smooth the data using a kernel function to help draw inferences about a population from a sample. @@ -820,20 +820,22 @@ def kde(data, h, kernel='normal'): Kernels that give some weight to every sample point: - normal or gauss + normal (gauss) logistic sigmoid Kernels that only give weight to sample points within the bandwidth: - rectangular or uniform + rectangular (uniform) triangular - parabolic or epanechnikov - quartic or biweight + parabolic (epanechnikov) + quartic (biweight) triweight cosine + If *cumulative* is true, will return a cumulative distribution function. + A StatisticsError will be raised if the data sequence is empty. Example @@ -847,7 +849,8 @@ def kde(data, h, kernel='normal'): Compute the area under the curve: - >>> sum(f_hat(x) for x in range(-20, 20)) + >>> area = sum(f_hat(x) for x in range(-20, 20)) + >>> round(area, 4) 1.0 Plot the estimated probability density function at @@ -876,6 +879,13 @@ def kde(data, h, kernel='normal'): 9: 0.009 x 10: 0.002 x + Estimate P(4.5 < X <= 7.5), the probability that a new sample value + will be between 4.5 and 7.5: + + >>> cdf = kde(sample, h=1.5, cumulative=True) + >>> round(cdf(7.5) - cdf(4.5), 2) + 0.22 + References ---------- @@ -888,6 +898,9 @@ def kde(data, h, kernel='normal'): Interactive graphical demonstration and exploration: https://demonstrations.wolfram.com/KernelDensityEstimation/ + Kernel estimation of cumulative distribution function of a random variable with bounded support + https://www.econstor.eu/bitstream/10419/207829/1/10.21307_stattrans-2016-037.pdf + """ n = len(data) @@ -903,45 +916,56 @@ def kde(data, h, kernel='normal'): match kernel: case 'normal' | 'gauss': - c = 1 / sqrt(2 * pi) - K = lambda t: c * exp(-1/2 * t * t) + sqrt2pi = sqrt(2 * pi) + sqrt2 = sqrt(2) + K = lambda t: exp(-1/2 * t * t) / sqrt2pi + I = lambda t: 1/2 * (1.0 + erf(t / sqrt2)) support = None case 'logistic': # 1.0 / (exp(t) + 2.0 + exp(-t)) K = lambda t: 1/2 / (1.0 + cosh(t)) + I = lambda t: 1.0 - 1.0 / (exp(t) + 1.0) support = None case 'sigmoid': # (2/pi) / (exp(t) + exp(-t)) - c = 1 / pi - K = lambda t: c / cosh(t) + c1 = 1 / pi + c2 = 2 / pi + K = lambda t: c1 / cosh(t) + I = lambda t: c2 * atan(exp(t)) support = None case 'rectangular' | 'uniform': K = lambda t: 1/2 + I = lambda t: 1/2 * t + 1/2 support = 1.0 case 'triangular': K = lambda t: 1.0 - abs(t) + I = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2 support = 1.0 case 'parabolic' | 'epanechnikov': K = lambda t: 3/4 * (1.0 - t * t) + I = lambda t: -1/4 * t**3 + 3/4 * t + 1/2 support = 1.0 case 'quartic' | 'biweight': K = lambda t: 15/16 * (1.0 - t * t) ** 2 + I = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2 support = 1.0 case 'triweight': K = lambda t: 35/32 * (1.0 - t * t) ** 3 + I = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2 support = 1.0 case 'cosine': c1 = pi / 4 c2 = pi / 2 K = lambda t: c1 * cos(c2 * t) + I = lambda t: 1/2 * sin(c2 * t) + 1/2 support = 1.0 case _: @@ -952,6 +976,9 @@ def kde(data, h, kernel='normal'): def pdf(x): return sum(K((x - x_i) / h) for x_i in data) / (n * h) + def cdf(x): + return sum(I((x - x_i) / h) for x_i in data) / n + else: sample = sorted(data) @@ -963,9 +990,19 @@ def pdf(x): supported = sample[i : j] return sum(K((x - x_i) / h) for x_i in supported) / (n * h) - pdf.__doc__ = f'PDF estimate with {h=!r} and {kernel=!r}' + def cdf(x): + i = bisect_left(sample, x - bandwidth) + j = bisect_right(sample, x + bandwidth) + supported = sample[i : j] + return sum((I((x - x_i) / h) for x_i in supported), i) / n - return pdf + if cumulative: + cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}' + return cdf + + else: + pdf.__doc__ = f'PDF estimate with {h=!r} and {kernel=!r}' + return pdf # Notes on methods for computing quantiles diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 1cf41638a7f01a..204787a88a9c5f 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2379,6 +2379,18 @@ def integrate(func, low, high, steps=10_000): area = integrate(f_hat, -20, 20) self.assertAlmostEqual(area, 1.0, places=4) + # Check CDF against an integral of the PDF + + data = [3, 5, 10, 12] + h = 2.3 + x = 10.5 + for kernel in kernels: + with self.subTest(kernel=kernel): + cdf = kde(data, h, kernel, cumulative=True) + f_hat = kde(data, h, kernel) + area = integrate(f_hat, -20, x, 100_000) + self.assertAlmostEqual(cdf(x), area, places=4) + # Check error cases with self.assertRaises(StatisticsError): @@ -2395,6 +2407,8 @@ def integrate(func, low, high, steps=10_000): kde(sample, h='str') # Wrong bandwidth type with self.assertRaises(StatisticsError): kde(sample, h=1.0, kernel='bogus') # Invalid kernel + with self.assertRaises(TypeError): + kde(sample, 1.0, 'gauss', True) # Positional cumulative argument # Test name and docstring of the generated function @@ -2403,7 +2417,7 @@ def integrate(func, low, high, steps=10_000): f_hat = kde(sample, h, kernel) self.assertEqual(f_hat.__name__, 'pdf') self.assertIn(kernel, f_hat.__doc__) - self.assertIn(str(h), f_hat.__doc__) + self.assertIn(repr(h), f_hat.__doc__) # Test closed interval for the support boundaries. # In particular, 'uniform' should non-zero at the boundaries. From 39df7732178c8e8f75b12f069a3dbc1715c99995 Mon Sep 17 00:00:00 2001 From: LilKS <1244886+LilKS@users.noreply.github.com> Date: Sun, 24 Mar 2024 11:01:07 +0100 Subject: [PATCH 32/39] gh-101760: Improve the imaplib.IMAP4 example (#101764) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/imaplib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/imaplib.rst b/Doc/library/imaplib.rst index d5c868def3b64f..ccfd0cd3dde109 100644 --- a/Doc/library/imaplib.rst +++ b/Doc/library/imaplib.rst @@ -622,7 +622,7 @@ retrieves and prints all messages:: import getpass, imaplib - M = imaplib.IMAP4() + M = imaplib.IMAP4(host='example.org') M.login(getpass.getuser(), getpass.getpass()) M.select() typ, data = M.search(None, 'ALL') From f267d5bf2a99fbeb26a720d1c87c1f0557424b14 Mon Sep 17 00:00:00 2001 From: Kerim Kabirov Date: Sun, 24 Mar 2024 14:59:14 +0100 Subject: [PATCH 33/39] GH-115986 Docs: promote pprint.pp usage as a default (#116614) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/pprint.rst | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/Doc/library/pprint.rst b/Doc/library/pprint.rst index 32e1351b7ffeeb..eebd270a096ba5 100644 --- a/Doc/library/pprint.rst +++ b/Doc/library/pprint.rst @@ -44,6 +44,17 @@ Functions *args* and *kwargs* will be passed to :func:`~pprint.pprint` as formatting parameters. + >>> import pprint + >>> stuff = ['spam', 'eggs', 'lumberjack', 'knights', 'ni'] + >>> stuff.insert(0, stuff) + >>> pprint.pp(stuff) + [, + 'spam', + 'eggs', + 'lumberjack', + 'knights', + 'ni'] + .. versionadded:: 3.8 @@ -61,16 +72,8 @@ Functions :class:`PrettyPrinter` constructor and their meanings are as described in its documentation below. - >>> import pprint - >>> stuff = ['spam', 'eggs', 'lumberjack', 'knights', 'ni'] - >>> stuff.insert(0, stuff) - >>> pprint.pprint(stuff) - [, - 'spam', - 'eggs', - 'lumberjack', - 'knights', - 'ni'] + Note that *sort_dicts* is ``True`` by default and you might want to use + :func:`~pprint.pp` instead where it is ``False`` by default. .. function:: pformat(object, indent=1, width=80, depth=None, *, \ compact=False, sort_dicts=True, underscore_numbers=False) @@ -261,7 +264,7 @@ are converted to strings. The default implementation uses the internals of the Example ------- -To demonstrate several uses of the :func:`~pprint.pprint` function and its parameters, +To demonstrate several uses of the :func:`~pprint.pp` function and its parameters, let's fetch information about a project from `PyPI `_:: >>> import json @@ -270,9 +273,9 @@ let's fetch information about a project from `PyPI `_:: >>> with urlopen('https://pypi.org/pypi/sampleproject/json') as resp: ... project_info = json.load(resp)['info'] -In its basic form, :func:`~pprint.pprint` shows the whole object:: +In its basic form, :func:`~pprint.pp` shows the whole object:: - >>> pprint.pprint(project_info) + >>> pprint.pp(project_info) {'author': 'The Python Packaging Authority', 'author_email': 'pypa-dev@googlegroups.com', 'bugtrack_url': None, @@ -329,7 +332,7 @@ In its basic form, :func:`~pprint.pprint` shows the whole object:: The result can be limited to a certain *depth* (ellipsis is used for deeper contents):: - >>> pprint.pprint(project_info, depth=1) + >>> pprint.pp(project_info, depth=1) {'author': 'The Python Packaging Authority', 'author_email': 'pypa-dev@googlegroups.com', 'bugtrack_url': None, @@ -375,7 +378,7 @@ contents):: Additionally, maximum character *width* can be suggested. If a long object cannot be split, the specified width will be exceeded:: - >>> pprint.pprint(project_info, depth=1, width=60) + >>> pprint.pp(project_info, depth=1, width=60) {'author': 'The Python Packaging Authority', 'author_email': 'pypa-dev@googlegroups.com', 'bugtrack_url': None, From 78a651fd7fbe7a3d1702e40f4cbfa72d87241ef0 Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Sun, 24 Mar 2024 11:38:34 -0400 Subject: [PATCH 34/39] gh-117194: Properly format 'base64' header in What's New (#117198) It needs 6, not 3, '-'s. --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index c9a93d58056747..aba672dcb3f456 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -284,7 +284,7 @@ asyncio (Contributed by Pierre Ossman in :gh:`113538`.) base64 ---- +------ * Add :func:`base64.z85encode` and :func:`base64.z85decode` functions which allow encoding and decoding z85 data. From 83485a095363dad6c97b19af2826ca0c34343bfc Mon Sep 17 00:00:00 2001 From: Totally a booplicate <53382877+Booplicate@users.noreply.github.com> Date: Sun, 24 Mar 2024 18:48:40 +0300 Subject: [PATCH 35/39] gh-112571: Move fish venv activation script into the common folder (GH-117169) pythongh-112571: allow using fish venv activation script on windows The fish shell can be used on windows under cygwin or msys2. This change moves the script to the common folder so the venv module will install it on both posix and nt systems (like the bash script). --- Lib/venv/scripts/{posix => common}/activate.fish | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Lib/venv/scripts/{posix => common}/activate.fish (100%) diff --git a/Lib/venv/scripts/posix/activate.fish b/Lib/venv/scripts/common/activate.fish similarity index 100% rename from Lib/venv/scripts/posix/activate.fish rename to Lib/venv/scripts/common/activate.fish From eebea7e515462b503632ada74923ec3246599c9c Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 24 Mar 2024 20:34:55 +0200 Subject: [PATCH 36/39] gh-117176: Fix compiler warning in Python/optimizer_bytecodes.c (GH-117199) --- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e761b1b3433f04..e38428af108893 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -545,7 +545,7 @@ dummy_func(void) { PyCodeObject *co = NULL; assert((this_instr + 2)->opcode == _PUSH_FRAME); - uintptr_t push_operand = (this_instr + 2)->operand; + uint64_t push_operand = (this_instr + 2)->operand; if (push_operand & 1) { co = (PyCodeObject *)(push_operand & ~1); DPRINTF(3, "code=%p ", co); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 2908a26c20973e..6aeea51e62584f 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1598,7 +1598,7 @@ (void)callable; PyCodeObject *co = NULL; assert((this_instr + 2)->opcode == _PUSH_FRAME); - uintptr_t push_operand = (this_instr + 2)->operand; + uint64_t push_operand = (this_instr + 2)->operand; if (push_operand & 1) { co = (PyCodeObject *)(push_operand & ~1); DPRINTF(3, "code=%p ", co); From 9db2a8f914ad59019d448cecc43b6d45f46424a0 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 25 Mar 2024 09:26:42 -0500 Subject: [PATCH 37/39] Minor markup and grammar fixes in the statistics docs (gh-117216) --- Doc/library/statistics.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index 79c68123454524..8cd43c2d6305d8 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -1000,8 +1000,8 @@ of applications in statistics. .. versionadded:: 3.8 -:class:`NormalDist` Examples and Recipes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Examples and Recipes +-------------------- Classic probability problems @@ -1036,7 +1036,7 @@ Find the `quartiles `_ and `deciles Monte Carlo inputs for simulations ********************************** -To estimate the distribution for a model than isn't easy to solve +To estimate the distribution for a model that isn't easy to solve analytically, :class:`NormalDist` can generate input samples for a `Monte Carlo simulation `_: From 01e7405da400e8997f8964d06cc414045e144681 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Mon, 25 Mar 2024 08:18:09 -0700 Subject: [PATCH 38/39] gh-112948: Make pdb completion similar to repl completion (#112950) --- Lib/pdb.py | 43 ++++++++++------ Lib/test/test_pdb.py | 51 +++++++++++++++++++ ...-12-11-00-51-51.gh-issue-112948.k-OKp5.rst | 1 + 3 files changed, 80 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-11-00-51-51.gh-issue-112948.k-OKp5.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 88ea900e63f42b..f8f42ddcdb2b20 100755 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -87,6 +87,7 @@ import linecache from contextlib import contextmanager +from rlcompleter import Completer from typing import Union @@ -573,20 +574,14 @@ def displayhook(self, obj): self.message(repr(obj)) @contextmanager - def _disable_tab_completion(self): - if self.use_rawinput and self.completekey == 'tab': - try: - import readline - except ImportError: - yield - return - try: - readline.parse_and_bind('tab: self-insert') - yield - finally: - readline.parse_and_bind('tab: complete') - else: + def _disable_command_completion(self): + completenames = self.completenames + try: + self.completenames = self.completedefault yield + finally: + self.completenames = completenames + return def default(self, line): if line[:1] == '!': line = line[1:].strip() @@ -595,7 +590,7 @@ def default(self, line): try: if (code := codeop.compile_command(line + '\n', '', 'single')) is None: # Multi-line mode - with self._disable_tab_completion(): + with self._disable_command_completion(): buffer = line continue_prompt = "... " while (code := codeop.compile_command(buffer, '', 'single')) is None: @@ -771,7 +766,10 @@ def completenames(self, text, line, begidx, endidx): if commands: return commands else: - return self._complete_expression(text, line, begidx, endidx) + expressions = self._complete_expression(text, line, begidx, endidx) + if expressions: + return expressions + return self.completedefault(text, line, begidx, endidx) def _complete_location(self, text, line, begidx, endidx): # Complete a file/module/function location for break/tbreak/clear. @@ -828,6 +826,21 @@ def _complete_expression(self, text, line, begidx, endidx): # Complete a simple name. return [n for n in ns.keys() if n.startswith(text)] + def completedefault(self, text, line, begidx, endidx): + if text.startswith("$"): + # Complete convenience variables + conv_vars = self.curframe.f_globals.get('__pdb_convenience_variables', {}) + return [f"${name}" for name in conv_vars if name.startswith(text[1:])] + + # Use rlcompleter to do the completion + state = 0 + matches = [] + completer = Completer(self.curframe.f_globals | self.curframe_locals) + while (match := completer.complete(text, state)) is not None: + matches.append(match) + state += 1 + return matches + # Command definitions, called by cmdloop() # The argument is the remaining string on the command line # Return true to exit from the command loop diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 69691e930562bc..9ee994e3fe309d 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -3567,6 +3567,57 @@ def test_expression_completion(self): self.assertIn(b'species', output) self.assertIn(b'$_frame', output) + def test_builtin_completion(self): + script = textwrap.dedent(""" + value = "speci" + import pdb; pdb.Pdb().set_trace() + """) + + # Complete: print(value + 'al') + input = b"pri\tval\t + 'al')\n" + + # Continue + input += b"c\n" + + output = run_pty(script, input) + + self.assertIn(b'special', output) + + def test_local_namespace(self): + script = textwrap.dedent(""" + def f(): + original = "I live Pythin" + import pdb; pdb.Pdb().set_trace() + f() + """) + + # Complete: original.replace('i', 'o') + input = b"orig\t.repl\t('i', 'o')\n" + + # Continue + input += b"c\n" + + output = run_pty(script, input) + + self.assertIn(b'I love Python', output) + + def test_multiline_completion(self): + script = textwrap.dedent(""" + import pdb; pdb.Pdb().set_trace() + """) + + input = b"def func():\n" + # Complete: \treturn 40 + 2 + input += b"\tret\t 40 + 2\n" + input += b"\n" + # Complete: func() + input += b"fun\t()\n" + input += b"c\n" + + output = run_pty(script, input) + + self.assertIn(b'42', output) + def load_tests(loader, tests, pattern): from test import test_pdb diff --git a/Misc/NEWS.d/next/Library/2023-12-11-00-51-51.gh-issue-112948.k-OKp5.rst b/Misc/NEWS.d/next/Library/2023-12-11-00-51-51.gh-issue-112948.k-OKp5.rst new file mode 100644 index 00000000000000..0925a7caba6f07 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-11-00-51-51.gh-issue-112948.k-OKp5.rst @@ -0,0 +1 @@ +Make completion of :mod:`pdb` similar to Python REPL From 0c1a42cf9c8cd0d4534d5c1d58f118ce7c5c446e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 25 Mar 2024 17:32:11 +0200 Subject: [PATCH 39/39] gh-87193: Support bytes objects with refcount > 1 in _PyBytes_Resize() (GH-117160) Create a new bytes object and destroy the old one if it has refcount > 1. --- Doc/c-api/bytes.rst | 8 +-- Lib/test/test_capi/test_bytes.py | 30 +++++++++++ ...4-03-22-19-29-24.gh-issue-87193.u7O-jY.rst | 3 ++ Modules/Setup.stdlib.in | 2 +- Modules/_testcapi/bytes.c | 53 +++++++++++++++++++ Modules/_testcapi/parts.h | 1 + Modules/_testcapimodule.c | 3 ++ Objects/bytesobject.c | 41 +++++++------- Objects/fileobject.c | 8 +-- PCbuild/_testcapi.vcxproj | 1 + PCbuild/_testcapi.vcxproj.filters | 3 ++ 11 files changed, 123 insertions(+), 30 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst create mode 100644 Modules/_testcapi/bytes.c diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index 4790d3b2da4375..bca78a9c369385 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -191,10 +191,10 @@ called with a non-bytes parameter. .. c:function:: int _PyBytes_Resize(PyObject **bytes, Py_ssize_t newsize) - A way to resize a bytes object even though it is "immutable". Only use this - to build up a brand new bytes object; don't use this if the bytes may already - be known in other parts of the code. It is an error to call this function if - the refcount on the input bytes object is not one. Pass the address of an + Resize a bytes object. *newsize* will be the new length of the bytes object. + You can think of it as creating a new bytes object and destroying the old + one, only more efficiently. + Pass the address of an existing bytes object as an lvalue (it may be written into), and the new size desired. On success, *\*bytes* holds the resized bytes object and ``0`` is returned; the address in *\*bytes* may differ from its input value. If the diff --git a/Lib/test/test_capi/test_bytes.py b/Lib/test/test_capi/test_bytes.py index a2ba7708f8fd26..f14d5545c829e5 100644 --- a/Lib/test/test_capi/test_bytes.py +++ b/Lib/test/test_capi/test_bytes.py @@ -2,6 +2,7 @@ from test.support import import_helper _testlimitedcapi = import_helper.import_module('_testlimitedcapi') +_testcapi = import_helper.import_module('_testcapi') from _testcapi import PY_SSIZE_T_MIN, PY_SSIZE_T_MAX NULL = None @@ -217,6 +218,35 @@ def test_decodeescape(self): # CRASHES decodeescape(b'abc', NULL, -1) # CRASHES decodeescape(NULL, NULL, 1) + def test_resize(self): + """Test _PyBytes_Resize()""" + resize = _testcapi.bytes_resize + + for new in True, False: + self.assertEqual(resize(b'abc', 0, new), b'') + self.assertEqual(resize(b'abc', 1, new), b'a') + self.assertEqual(resize(b'abc', 2, new), b'ab') + self.assertEqual(resize(b'abc', 3, new), b'abc') + b = resize(b'abc', 4, new) + self.assertEqual(len(b), 4) + self.assertEqual(b[:3], b'abc') + + self.assertEqual(resize(b'a', 0, new), b'') + self.assertEqual(resize(b'a', 1, new), b'a') + b = resize(b'a', 2, new) + self.assertEqual(len(b), 2) + self.assertEqual(b[:1], b'a') + + self.assertEqual(resize(b'', 0, new), b'') + self.assertEqual(len(resize(b'', 1, new)), 1) + self.assertEqual(len(resize(b'', 2, new)), 2) + + self.assertRaises(SystemError, resize, b'abc', -1, False) + self.assertRaises(SystemError, resize, bytearray(b'abc'), 3, False) + + # CRASHES resize(NULL, 0, False) + # CRASHES resize(NULL, 3, False) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst b/Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst new file mode 100644 index 00000000000000..cb921a9c7bf36e --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-03-22-19-29-24.gh-issue-87193.u7O-jY.rst @@ -0,0 +1,3 @@ +:c:func:`_PyBytes_Resize` can now be called for bytes objects with reference +count > 1, including 1-byte bytes objects. It creates a new bytes object and +destroys the old one if it has reference count > 1. diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 09d6f3b2bb7e8d..ff5c05f88d0d40 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -162,7 +162,7 @@ @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c @MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c -@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c +@MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c _testcapi/bytes.c @MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/sys.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c @MODULE__TESTCLINIC_LIMITED_TRUE@_testclinic_limited _testclinic_limited.c diff --git a/Modules/_testcapi/bytes.c b/Modules/_testcapi/bytes.c new file mode 100644 index 00000000000000..02294d8887abb7 --- /dev/null +++ b/Modules/_testcapi/bytes.c @@ -0,0 +1,53 @@ +#include "parts.h" +#include "util.h" + + +/* Test _PyBytes_Resize() */ +static PyObject * +bytes_resize(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *obj; + Py_ssize_t newsize; + int new; + + if (!PyArg_ParseTuple(args, "Onp", &obj, &newsize, &new)) + return NULL; + + NULLABLE(obj); + if (new) { + assert(obj != NULL); + assert(PyBytes_CheckExact(obj)); + PyObject *newobj = PyBytes_FromStringAndSize(NULL, PyBytes_Size(obj)); + if (newobj == NULL) { + return NULL; + } + memcpy(PyBytes_AsString(newobj), PyBytes_AsString(obj), PyBytes_Size(obj)); + obj = newobj; + } + else { + Py_XINCREF(obj); + } + if (_PyBytes_Resize(&obj, newsize) < 0) { + assert(obj == NULL); + } + else { + assert(obj != NULL); + } + return obj; +} + + +static PyMethodDef test_methods[] = { + {"bytes_resize", bytes_resize, METH_VARARGS}, + {NULL}, +}; + +int +_PyTestCapi_Init_Bytes(PyObject *m) +{ + if (PyModule_AddFunctions(m, test_methods) < 0) { + return -1; + } + + return 0; +} diff --git a/Modules/_testcapi/parts.h b/Modules/_testcapi/parts.h index f9bdd830775a75..e7c868f6bcff6e 100644 --- a/Modules/_testcapi/parts.h +++ b/Modules/_testcapi/parts.h @@ -31,6 +31,7 @@ int _PyTestCapi_Init_Vectorcall(PyObject *module); int _PyTestCapi_Init_Heaptype(PyObject *module); int _PyTestCapi_Init_Abstract(PyObject *module); +int _PyTestCapi_Init_Bytes(PyObject *module); int _PyTestCapi_Init_Unicode(PyObject *module); int _PyTestCapi_Init_GetArgs(PyObject *module); int _PyTestCapi_Init_DateTime(PyObject *module); diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 16b5e1d257eed2..3c30381be6d538 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3971,6 +3971,9 @@ PyInit__testcapi(void) if (_PyTestCapi_Init_Abstract(m) < 0) { return NULL; } + if (_PyTestCapi_Init_Bytes(m) < 0) { + return NULL; + } if (_PyTestCapi_Init_Unicode(m) < 0) { return NULL; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 26227dd251122d..256e01f54f0782 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3025,11 +3025,9 @@ PyBytes_ConcatAndDel(PyObject **pv, PyObject *w) /* The following function breaks the notion that bytes are immutable: - it changes the size of a bytes object. We get away with this only if there - is only one module referencing the object. You can also think of it + it changes the size of a bytes object. You can think of it as creating a new bytes object and destroying the old one, only - more efficiently. In any case, don't use this if the bytes object may - already be known to some other part of the code... + more efficiently. Note that if there's not enough memory to resize the bytes object, the original bytes object at *pv is deallocated, *pv is set to NULL, an "out of memory" exception is set, and -1 is returned. Else (on success) 0 is @@ -3045,28 +3043,40 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) PyBytesObject *sv; v = *pv; if (!PyBytes_Check(v) || newsize < 0) { - goto error; + *pv = 0; + Py_DECREF(v); + PyErr_BadInternalCall(); + return -1; } - if (Py_SIZE(v) == newsize) { + Py_ssize_t oldsize = PyBytes_GET_SIZE(v); + if (oldsize == newsize) { /* return early if newsize equals to v->ob_size */ return 0; } - if (Py_SIZE(v) == 0) { - if (newsize == 0) { - return 0; - } + if (oldsize == 0) { *pv = _PyBytes_FromSize(newsize, 0); Py_DECREF(v); return (*pv == NULL) ? -1 : 0; } - if (Py_REFCNT(v) != 1) { - goto error; - } if (newsize == 0) { *pv = bytes_get_empty(); Py_DECREF(v); return 0; } + if (Py_REFCNT(v) != 1) { + if (oldsize < newsize) { + *pv = _PyBytes_FromSize(newsize, 0); + if (*pv) { + memcpy(PyBytes_AS_STRING(*pv), PyBytes_AS_STRING(v), oldsize); + } + } + else { + *pv = PyBytes_FromStringAndSize(PyBytes_AS_STRING(v), newsize); + } + Py_DECREF(v); + return (*pv == NULL) ? -1 : 0; + } + #ifdef Py_TRACE_REFS _Py_ForgetReference(v); #endif @@ -3089,11 +3099,6 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS sv->ob_shash = -1; /* invalidate cached hash value */ _Py_COMP_DIAG_POP return 0; -error: - *pv = 0; - Py_DECREF(v); - PyErr_BadInternalCall(); - return -1; } diff --git a/Objects/fileobject.c b/Objects/fileobject.c index e30ab952dff571..bae49d367b65ee 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -80,13 +80,7 @@ PyFile_GetLine(PyObject *f, int n) "EOF when reading a line"); } else if (s[len-1] == '\n') { - if (Py_REFCNT(result) == 1) - _PyBytes_Resize(&result, len-1); - else { - PyObject *v; - v = PyBytes_FromStringAndSize(s, len-1); - Py_SETREF(result, v); - } + (void) _PyBytes_Resize(&result, len-1); } } if (n < 0 && result != NULL && PyUnicode_Check(result)) { diff --git a/PCbuild/_testcapi.vcxproj b/PCbuild/_testcapi.vcxproj index 6522cb1fcf5c63..615d73d5e003b4 100644 --- a/PCbuild/_testcapi.vcxproj +++ b/PCbuild/_testcapi.vcxproj @@ -98,6 +98,7 @@ + diff --git a/PCbuild/_testcapi.vcxproj.filters b/PCbuild/_testcapi.vcxproj.filters index 772a9a861517ec..0c11e918556ff5 100644 --- a/PCbuild/_testcapi.vcxproj.filters +++ b/PCbuild/_testcapi.vcxproj.filters @@ -30,6 +30,9 @@ Source Files + + Source Files + Source Files