From bb23e4bf5ee51e2067dcbac8e5d384e7164f0d37 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Dec 2024 18:38:23 +0900 Subject: [PATCH] gh-115999: Enable BINARY_SUBSCR_GETITEM for free-threaded build --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_typeobject.h | 3 ++ Include/internal/pycore_uop_metadata.h | 4 +- Lib/test/test_opcache.py | 18 +++++++- Objects/typeobject.c | 56 +++++++++++++++++++++++ Python/bytecodes.c | 9 ++-- Python/executor_cases.c.h | 16 +++++-- Python/generated_cases.c.h | 13 ++++-- Python/specialize.c | 18 +++----- 9 files changed, 108 insertions(+), 31 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 81dde66a6f26c2..1b0874b24f8bf3 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1951,7 +1951,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 7b39d07f976ee3..abb94248526556 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -278,6 +278,9 @@ typedef int (*_py_validate_type)(PyTypeObject *); // and if the validation is passed, it will set the ``tp_version`` as valid // tp_version_tag from the ``ty``. extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); +extern PyObject *_PyType_GetItemFromCache(PyTypeObject *type); +extern PyObject *_PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version); +extern int _PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor); #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 89fce193f40bd8..d95be4beb59d5a 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -88,8 +88,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG, - [_BINARY_SUBSCR_INIT_CALL] = 0, + [_BINARY_SUBSCR_CHECK_FUNC] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_INIT_CALL] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LIST_APPEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 50b5f365165921..cc2de3b46725bc 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -609,7 +609,7 @@ def assert_races_do_not_crash( for writer in writers: writer.join() - @requires_specialization + @requires_specialization_ft def test_binary_subscr_getitem(self): def get_items(): class C: @@ -1069,7 +1069,7 @@ def write(items): opname = "STORE_SUBSCR_LIST_INT" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_unpack_sequence_list(self): def get_items(): items = [] @@ -1520,6 +1520,20 @@ def binary_subscr_str_int(): self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT") self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR") + def binary_subscr_getitems(): + class C: + def __init__(self, val): + self.val = val + def __getitem__(self, item): + return self.val + items = [C(i) for i in range(100)] + for i in range(100): + self.assertEqual(items[i][i], i) + + binary_subscr_getitems() + self.assert_specialized(binary_subscr_getitems, "BINARY_SUBSCR_GETITEM") + self.assert_no_opcode(binary_subscr_getitems, "BINARY_SUBSCR") + if __name__ == "__main__": unittest.main() diff --git a/Objects/typeobject.c b/Objects/typeobject.c index cc95b9857e3f2d..5d61d0d08442ce 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5692,6 +5692,62 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, return can_cache; } +int +_PyType_CacheGetItemForSpecialization(PyTypeObject *type, PyObject *descriptor) +{ + int can_cache = 0; + if (!type) { + return -1; + } + BEGIN_TYPE_LOCK(); + // This pointer is invalidated by PyType_Modified (see the comment on + // struct _specialization_cache): + PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + PyFunctionObject *func = (PyFunctionObject *)descriptor; + uint32_t version = _PyFunction_GetVersionForCurrentState(func); + if (!_PyFunction_IsVersionValid(version)) { + can_cache = -1; + goto end; + } + #ifdef Py_GIL_DISABLED + can_cache = _PyObject_HasDeferredRefcount(descriptor); + #else + can_cache = 0; + #endif + FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); + FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); +end: + END_TYPE_LOCK(); + return can_cache; +} + +PyObject * +_PyType_GetItemFromCache(PyTypeObject *type) +{ + PyObject *res = NULL; + BEGIN_TYPE_LOCK(); + PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + res = ht->_spec_cache.getitem; + END_TYPE_LOCK(); + return res; +} + +PyObject * +_PyType_GetItemFromCacheWithVersion(PyTypeObject *type, uint32_t *version) +{ + PyObject *res = NULL; + BEGIN_TYPE_LOCK(); + PyHeapTypeObject *ht = (PyHeapTypeObject *)type; + res = ht->_spec_cache.getitem; + if (res == NULL) { + goto end; + } + *version = ht->_spec_cache.getitem_version; +end: + END_TYPE_LOCK(); + return res; +} + static void set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3d280941b35244..56620911486c60 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -867,11 +867,10 @@ dummy_func( op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) { PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + uint32_t cached_version; + PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); DEOPT_IF(getitem == NULL); assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); @@ -881,8 +880,8 @@ dummy_func( op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) { PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + PyObject *getitem = _PyType_GetItemFromCache(tp); + DEOPT_IF(getitem == NULL); new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 987ff2e6419669..68c8cb170b7ac2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1131,14 +1131,15 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + uint32_t cached_version; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); + stack_pointer = _PyFrame_GetStackPointer(frame); if (getitem == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; if (((PyFunctionObject *)getitem)->func_version != cached_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -1160,8 +1161,13 @@ sub = stack_pointer[-1]; container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *getitem = _PyType_GetItemFromCache(tp); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (getitem == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 33f32aba1e5145..f013e165161fe9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -517,11 +517,12 @@ container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + uint32_t cached_version; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *getitem = _PyType_GetItemFromCacheWithVersion(tp, &cached_version); + stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(getitem == NULL, BINARY_SUBSCR); assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); @@ -532,8 +533,10 @@ { sub = stack_pointer[-1]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *getitem = _PyType_GetItemFromCache(tp); + stack_pointer = _PyFrame_GetStackPointer(frame); + DEOPT_IF(getitem == NULL, BINARY_SUBSCR); new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; diff --git a/Python/specialize.c b/Python/specialize.c index d3fea717243847..a9c92ee655bd95 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1085,6 +1085,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_METHOD); return -1; } + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER); return -1; @@ -1154,6 +1155,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na if (version == 0) { return -1; } + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER); return -1; @@ -1761,9 +1763,7 @@ _Py_Specialize_BinarySubscr( specialized_op = BINARY_SUBSCR_DICT; goto success; } -#ifndef Py_GIL_DISABLED - PyTypeObject *cls = Py_TYPE(container); - PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); + PyObject *descriptor = _PyType_Lookup(container_type, &_Py_ID(__getitem__)); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE); @@ -1780,24 +1780,18 @@ _Py_Specialize_BinarySubscr( SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); goto fail; } - uint32_t version = _PyFunction_GetVersionForCurrentState(func); - if (!_PyFunction_IsVersionValid(version)) { + if (_PyType_CacheGetItemForSpecialization(container_type, descriptor) < 0) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; } + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OTHER); goto fail; } - PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type; - // This pointer is invalidated by PyType_Modified (see the comment on - // struct _specialization_cache): - ht->_spec_cache.getitem = descriptor; - ht->_spec_cache.getitem_version = version; specialized_op = BINARY_SUBSCR_GETITEM; goto success; } -#endif // Py_GIL_DISABLED SPECIALIZATION_FAIL(BINARY_SUBSCR, binary_subscr_fail_kind(container_type, sub)); fail: @@ -2606,6 +2600,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR ); + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER); goto failure; @@ -2634,6 +2629,7 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr) assert(_PyOpcode_Caches[SEND] == INLINE_CACHE_ENTRIES_SEND); PyTypeObject *tp = Py_TYPE(receiver); if (tp == &PyGen_Type || tp == &PyCoro_Type) { + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(SEND, SPEC_FAIL_OTHER); goto failure;