From e2713409cff5b71b1176b0e3fa63dae447548672 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 2 Dec 2024 10:38:17 +0900 Subject: [PATCH] gh-115999: Add partial free-thread specialization for BINARY_SUBSCR (gh-127227) --- Include/internal/pycore_list.h | 3 + Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/test_dis.py | 21 ----- Lib/test/test_opcache.py | 107 ++++++++++++++++------ Objects/listobject.c | 6 ++ Python/bytecodes.c | 10 +- Python/executor_cases.c.h | 11 +++ Python/generated_cases.c.h | 12 ++- Python/specialize.c | 25 +++-- 10 files changed, 128 insertions(+), 71 deletions(-) diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 2c666f9be4bd79..f03e484f5ef8b0 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -10,6 +10,9 @@ extern "C" { PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); extern void _PyList_DebugMallocStats(FILE *out); +// _PyList_GetItemRef should be used only when the object is known as a list +// because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does. +extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i); #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 5ce172856e1b19..d63c8df8ca6690 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1952,7 +1952,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, - [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1c1f478c3833c8..1825bb3a5abc80 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -84,7 +84,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG, + [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index f26411ace8fa73..55890e58ed4bae 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1260,27 +1260,6 @@ def test_super_instructions(self): got = self.get_disassembly(load_test, adaptive=True) self.do_disassembly_compare(got, dis_load_test_quickened_code) - @cpython_only - @requires_specialization - def test_binary_subscr_specialize(self): - binary_subscr_quicken = """\ - 0 RESUME_CHECK 0 - - 1 LOAD_NAME 0 (a) - LOAD_SMALL_INT 0 - %s - RETURN_VALUE -""" - co_list = compile('a[0]', "", "eval") - self.code_quicken(lambda: exec(co_list, {}, {'a': [0]})) - got = self.get_disassembly(co_list, adaptive=True) - self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_LIST_INT") - - co_dict = compile('a[0]', "", "eval") - self.code_quicken(lambda: exec(co_dict, {}, {'a': {0: '1'}})) - got = self.get_disassembly(co_dict, adaptive=True) - self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_DICT") - @cpython_only @requires_specialization def test_load_attr_specialize(self): diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 1a6eac236009c3..b989b21cd9b3a9 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -617,7 +617,7 @@ def write(items): opname = "BINARY_SUBSCR_GETITEM" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_binary_subscr_list_int(self): def get_items(): items = [] @@ -1023,7 +1023,7 @@ def write(items): opname = "STORE_ATTR_WITH_HINT" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_store_subscr_list_int(self): def get_items(): items = [] @@ -1229,48 +1229,48 @@ class TestSpecializer(TestBase): @cpython_only @requires_specialization_ft def test_binary_op(self): - def f(): + def binary_op_add_int(): for _ in range(100): a, b = 1, 2 c = a + b self.assertEqual(c, 3) - f() - self.assert_specialized(f, "BINARY_OP_ADD_INT") - self.assert_no_opcode(f, "BINARY_OP") + binary_op_add_int() + self.assert_specialized(binary_op_add_int, "BINARY_OP_ADD_INT") + self.assert_no_opcode(binary_op_add_int, "BINARY_OP") - def g(): + def binary_op_add_unicode(): for _ in range(100): a, b = "foo", "bar" c = a + b self.assertEqual(c, "foobar") - g() - self.assert_specialized(g, "BINARY_OP_ADD_UNICODE") - self.assert_no_opcode(g, "BINARY_OP") + binary_op_add_unicode() + self.assert_specialized(binary_op_add_unicode, "BINARY_OP_ADD_UNICODE") + self.assert_no_opcode(binary_op_add_unicode, "BINARY_OP") @cpython_only @requires_specialization_ft def test_contain_op(self): - def f(): + def contains_op_dict(): for _ in range(100): a, b = 1, {1: 2, 2: 5} self.assertTrue(a in b) self.assertFalse(3 in b) - f() - self.assert_specialized(f, "CONTAINS_OP_DICT") - self.assert_no_opcode(f, "CONTAINS_OP") + contains_op_dict() + self.assert_specialized(contains_op_dict, "CONTAINS_OP_DICT") + self.assert_no_opcode(contains_op_dict, "CONTAINS_OP") - def g(): + def contains_op_set(): for _ in range(100): a, b = 1, {1, 2} self.assertTrue(a in b) self.assertFalse(3 in b) - g() - self.assert_specialized(g, "CONTAINS_OP_SET") - self.assert_no_opcode(g, "CONTAINS_OP") + contains_op_set() + self.assert_specialized(contains_op_set, "CONTAINS_OP_SET") + self.assert_no_opcode(contains_op_set, "CONTAINS_OP") @cpython_only @requires_specialization_ft @@ -1342,34 +1342,81 @@ def to_bool_str(): @cpython_only @requires_specialization_ft def test_unpack_sequence(self): - def f(): + def unpack_sequence_two_tuple(): for _ in range(100): a, b = 1, 2 self.assertEqual(a, 1) self.assertEqual(b, 2) - f() - self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE") - self.assert_no_opcode(f, "UNPACK_SEQUENCE") + unpack_sequence_two_tuple() + self.assert_specialized(unpack_sequence_two_tuple, + "UNPACK_SEQUENCE_TWO_TUPLE") + self.assert_no_opcode(unpack_sequence_two_tuple, "UNPACK_SEQUENCE") - def g(): + def unpack_sequence_tuple(): for _ in range(100): a, = 1, self.assertEqual(a, 1) - g() - self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE") - self.assert_no_opcode(g, "UNPACK_SEQUENCE") + unpack_sequence_tuple() + self.assert_specialized(unpack_sequence_tuple, "UNPACK_SEQUENCE_TUPLE") + self.assert_no_opcode(unpack_sequence_tuple, "UNPACK_SEQUENCE") - def x(): + def unpack_sequence_list(): for _ in range(100): a, b = [1, 2] self.assertEqual(a, 1) self.assertEqual(b, 2) - x() - self.assert_specialized(x, "UNPACK_SEQUENCE_LIST") - self.assert_no_opcode(x, "UNPACK_SEQUENCE") + unpack_sequence_list() + self.assert_specialized(unpack_sequence_list, "UNPACK_SEQUENCE_LIST") + self.assert_no_opcode(unpack_sequence_list, "UNPACK_SEQUENCE") + + @cpython_only + @requires_specialization_ft + def test_binary_subscr(self): + def binary_subscr_list_int(): + for _ in range(100): + a = [1, 2, 3] + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_list_int() + self.assert_specialized(binary_subscr_list_int, + "BINARY_SUBSCR_LIST_INT") + self.assert_no_opcode(binary_subscr_list_int, "BINARY_SUBSCR") + + def binary_subscr_tuple_int(): + for _ in range(100): + a = (1, 2, 3) + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_tuple_int() + self.assert_specialized(binary_subscr_tuple_int, + "BINARY_SUBSCR_TUPLE_INT") + self.assert_no_opcode(binary_subscr_tuple_int, "BINARY_SUBSCR") + + def binary_subscr_dict(): + for _ in range(100): + a = {1: 2, 2: 3} + self.assertEqual(a[1], 2) + self.assertEqual(a[2], 3) + + binary_subscr_dict() + self.assert_specialized(binary_subscr_dict, "BINARY_SUBSCR_DICT") + self.assert_no_opcode(binary_subscr_dict, "BINARY_SUBSCR") + + def binary_subscr_str_int(): + for _ in range(100): + a = "foobar" + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_str_int() + self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT") + self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR") + if __name__ == "__main__": unittest.main() diff --git a/Objects/listobject.c b/Objects/listobject.c index bb0040cbe9f272..4b24f4a428e18b 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -391,6 +391,12 @@ PyList_GetItemRef(PyObject *op, Py_ssize_t i) return item; } +PyObject * +_PyList_GetItemRef(PyListObject *list, Py_ssize_t i) +{ + return list_get_item_ref(list, i); +} + int PyList_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a14b32b8108be8..c07ec42ec68f8b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -704,7 +704,7 @@ dummy_func( }; specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT assert(frame->stackpointer == NULL); if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; @@ -713,7 +713,7 @@ dummy_func( } OPCODE_DEFERRED_INC(BINARY_SUBSCR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_BINARY_SUBSCR, (container, sub -- res)) { @@ -790,11 +790,17 @@ dummy_func( // Deopt unless 0 <= sub < PyList_Size(list) DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub)); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; +#ifdef Py_GIL_DISABLED + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + DEOPT_IF(res_o == NULL); + STAT_INC(BINARY_SUBSCR, hit); +#else DEOPT_IF(index >= PyList_GET_SIZE(list)); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); +#endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(list_st); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d46412a193332b..c91257b06cad11 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -981,6 +981,16 @@ JUMP_TO_JUMP_TARGET(); } Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (res_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(BINARY_SUBSCR, hit); + #else if (index >= PyList_GET_SIZE(list)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -989,6 +999,7 @@ PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); + #endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c9a5132269398c..45bcc4242af9d7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -433,7 +433,7 @@ container = stack_pointer[-2]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT assert(frame->stackpointer == NULL); if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; @@ -444,7 +444,7 @@ } OPCODE_DEFERRED_INC(BINARY_SUBSCR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _BINARY_SUBSCR { @@ -577,11 +577,19 @@ // Deopt unless 0 <= sub < PyList_Size(list) DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + stack_pointer = _PyFrame_GetStackPointer(frame); + DEOPT_IF(res_o == NULL, BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); + #else DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); + #endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/specialize.c b/Python/specialize.c index 172dae7d374602..d03310de782fe7 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1717,15 +1717,15 @@ _Py_Specialize_BinarySubscr( PyObject *container = PyStackRef_AsPyObjectBorrow(container_st); PyObject *sub = PyStackRef_AsPyObjectBorrow(sub_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[BINARY_SUBSCR] == INLINE_CACHE_ENTRIES_BINARY_SUBSCR); - _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1); PyTypeObject *container_type = Py_TYPE(container); + uint8_t specialized_op; if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_LIST_INT; + specialized_op = BINARY_SUBSCR_LIST_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1738,7 +1738,7 @@ _Py_Specialize_BinarySubscr( if (container_type == &PyTuple_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_TUPLE_INT; + specialized_op = BINARY_SUBSCR_TUPLE_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1751,7 +1751,7 @@ _Py_Specialize_BinarySubscr( if (container_type == &PyUnicode_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_STR_INT; + specialized_op = BINARY_SUBSCR_STR_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1762,9 +1762,10 @@ _Py_Specialize_BinarySubscr( goto fail; } if (container_type == &PyDict_Type) { - instr->op.code = BINARY_SUBSCR_DICT; + specialized_op = BINARY_SUBSCR_DICT; goto success; } +#ifndef Py_GIL_DISABLED PyTypeObject *cls = Py_TYPE(container); PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { @@ -1797,21 +1798,17 @@ _Py_Specialize_BinarySubscr( // struct _specialization_cache): ht->_spec_cache.getitem = descriptor; ht->_spec_cache.getitem_version = version; - instr->op.code = BINARY_SUBSCR_GETITEM; + specialized_op = BINARY_SUBSCR_GETITEM; goto success; } +#endif // Py_GIL_DISABLED SPECIALIZATION_FAIL(BINARY_SUBSCR, binary_subscr_fail_kind(container_type, sub)); fail: - STAT_INC(BINARY_SUBSCR, failure); - assert(!PyErr_Occurred()); - instr->op.code = BINARY_SUBSCR; - cache->counter = adaptive_counter_backoff(cache->counter); + unspecialize(instr); return; success: - STAT_INC(BINARY_SUBSCR, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + specialize(instr, specialized_op); }