From a6dd78131131835ef78c9f70f88ec6f8efcf6287 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 6 Nov 2024 12:35:10 +0900 Subject: [PATCH] gh-115999: Add free-threaded specialization for CONTAINS_OP (gh-126450) - The specialization logic determines the appropriate specialization using only the operand's type, which is safe to read non-atomically (changing it requires stopping the world). We are guaranteed that the type will not change in between when it is checked and when we specialize the bytecode because the types involved are immutable (you cannot assign to `__class__` for exact instances of `dict`, `set`, or `frozenset`). The bytecode is mutated atomically using helpers. - The specialized instructions rely on the operand type not changing in between the `DEOPT_IF` checks and the calls to the appropriate type-specific helpers (e.g. `_PySet_Contains`). This is a correctness requirement in the default builds and there are no changes to the opcodes in the free-threaded builds that would invalidate this. --- Lib/test/test_dis.py | 21 +++++++++++++++++++++ Python/bytecodes.c | 2 +- Python/generated_cases.c.h | 2 +- Python/specialize.c | 10 ++++++---- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index a991c67fca46bea..337ee3bbb05136b 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1335,6 +1335,27 @@ def test_call_specialize(self): got = self.get_disassembly(co, adaptive=True) self.do_disassembly_compare(got, call_quicken) + @cpython_only + @requires_specialization_ft + def test_contains_specialize(self): + contains_op_quicken = """\ + 0 RESUME_CHECK 0 + + 1 LOAD_NAME 0 (a) + LOAD_NAME 1 (b) + %s + RETURN_VALUE +""" + co_dict = compile('a in b', "", "eval") + self.code_quicken(lambda: exec(co_dict, {}, {'a': 1, 'b': {1: 5}})) + got = self.get_disassembly(co_dict, adaptive=True) + self.do_disassembly_compare(got, contains_op_quicken % "CONTAINS_OP_DICT 0 (in)") + + co_set = compile('a in b', "", "eval") + self.code_quicken(lambda: exec(co_set, {}, {'a': 1.0, 'b': {1, 2, 3}})) + got = self.get_disassembly(co_set, adaptive=True) + self.do_disassembly_compare(got, contains_op_quicken % "CONTAINS_OP_SET 0 (in)") + @cpython_only @requires_specialization def test_loop_quicken(self): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8c52db6ab684369..7ae0f20369641a7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2508,7 +2508,7 @@ dummy_func( } specializing op(_SPECIALIZE_CONTAINS_OP, (counter/1, left, right -- left, right)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ContainsOp(right, next_instr); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d346875ea4455f0..03b4d2224922f0e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3395,7 +3395,7 @@ right = stack_pointer[-1]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/specialize.c b/Python/specialize.c index 86cb997ca2ced3a..17e661b2bd3c769 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2747,25 +2747,27 @@ _Py_Specialize_ContainsOp(_PyStackRef value_st, _Py_CODEUNIT *instr) { PyObject *value = PyStackRef_AsPyObjectBorrow(value_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CONTAINS_OP] == INLINE_CACHE_ENTRIES_COMPARE_OP); + uint8_t specialized_op; _PyContainsOpCache *cache = (_PyContainsOpCache *)(instr + 1); if (PyDict_CheckExact(value)) { - instr->op.code = CONTAINS_OP_DICT; + specialized_op = CONTAINS_OP_DICT; goto success; } if (PySet_CheckExact(value) || PyFrozenSet_CheckExact(value)) { - instr->op.code = CONTAINS_OP_SET; + specialized_op = CONTAINS_OP_SET; goto success; } SPECIALIZATION_FAIL(CONTAINS_OP, containsop_fail_kind(value)); STAT_INC(CONTAINS_OP, failure); - instr->op.code = CONTAINS_OP; + SET_OPCODE_OR_RETURN(instr, CONTAINS_OP); cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(CONTAINS_OP, success); + SET_OPCODE_OR_RETURN(instr, specialized_op); cache->counter = adaptive_counter_cooldown(); }