From 2de048ce79e621f5ae0574095b9600fe8595f607 Mon Sep 17 00:00:00 2001 From: mpage Date: Fri, 13 Dec 2024 10:17:16 -0800 Subject: [PATCH] gh-115999: Specialize loading attributes from modules in free-threaded builds (#127711) We use the same approach that was used for specialization of LOAD_GLOBAL in free-threaded builds: _CHECK_ATTR_MODULE is renamed to _CHECK_ATTR_MODULE_PUSH_KEYS; it pushes the keys object for the following _LOAD_ATTR_MODULE_FROM_KEYS (nee _LOAD_ATTR_MODULE). This arrangement avoids having to recheck the keys version. _LOAD_ATTR_MODULE is renamed to _LOAD_ATTR_MODULE_FROM_KEYS; it loads the value from the keys object pushed by the preceding _CHECK_ATTR_MODULE_PUSH_KEYS at the cached index. --- Include/internal/pycore_opcode_metadata.h | 4 +- Include/internal/pycore_uop_ids.h | 141 +++++++++++---------- Include/internal/pycore_uop_metadata.h | 16 ++- Lib/test/test_capi/test_misc.py | 47 +++++-- Lib/test/test_generated_cases.py | 74 +++++++++++ Lib/test/test_opcache.py | 2 +- Python/bytecodes.c | 59 ++++++--- Python/executor_cases.c.h | 70 ++++++++-- Python/generated_cases.c.h | 33 +++-- Python/optimizer_analysis.c | 2 +- Python/optimizer_bytecodes.c | 10 +- Python/optimizer_cases.c.h | 47 +++++-- Python/specialize.c | 92 ++++++++------ Tools/cases_generator/generators_common.py | 15 +++ Tools/cases_generator/stack.py | 4 + 15 files changed, 437 insertions(+), 179 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 81dde66a6f26c2..28aa1120414337 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1492,7 +1492,7 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { return 0; } case LOAD_ATTR_MODULE: { - *effect = Py_MAX(0, (oparg & 1)); + *effect = Py_MAX(1, (oparg & 1)); return 0; } case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { @@ -2271,7 +2271,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_ATTR_METHOD_LAZY_DICT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_METHOD_LAZY_DICT, 1, 3 }, { _LOAD_ATTR_METHOD_LAZY_DICT, 4, 5 } } }, [LOAD_ATTR_METHOD_NO_DICT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_METHOD_NO_DICT, 4, 5 } } }, [LOAD_ATTR_METHOD_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, 0, 0 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_METHOD_WITH_VALUES, 4, 5 } } }, - [LOAD_ATTR_MODULE] = { .nuops = 2, .uops = { { _CHECK_ATTR_MODULE, 2, 1 }, { _LOAD_ATTR_MODULE, 1, 3 } } }, + [LOAD_ATTR_MODULE] = { .nuops = 2, .uops = { { _CHECK_ATTR_MODULE_PUSH_KEYS, 2, 1 }, { _LOAD_ATTR_MODULE_FROM_KEYS, 1, 3 } } }, [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_NONDESCRIPTOR_NO_DICT, 4, 5 } } }, [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, 0, 0 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, 4, 5 } } }, [LOAD_ATTR_PROPERTY] = { .nuops = 5, .uops = { { _CHECK_PEP_523, 0, 0 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_PROPERTY_FRAME, 4, 5 }, { _SAVE_RETURN_OFFSET, 7, 9 }, { _PUSH_FRAME, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index fab4ce6a25b347..45563585dd5681 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -55,7 +55,7 @@ extern "C" { #define _CHECK_AND_ALLOCATE_OBJECT 327 #define _CHECK_ATTR_CLASS 328 #define _CHECK_ATTR_METHOD_LAZY_DICT 329 -#define _CHECK_ATTR_MODULE 330 +#define _CHECK_ATTR_MODULE_PUSH_KEYS 330 #define _CHECK_ATTR_WITH_HINT 331 #define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 332 #define _CHECK_EG_MATCH CHECK_EG_MATCH @@ -186,115 +186,116 @@ extern "C" { #define _LOAD_ATTR_METHOD_NO_DICT 416 #define _LOAD_ATTR_METHOD_WITH_VALUES 417 #define _LOAD_ATTR_MODULE 418 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 419 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 420 -#define _LOAD_ATTR_PROPERTY_FRAME 421 -#define _LOAD_ATTR_SLOT 422 -#define _LOAD_ATTR_SLOT_0 423 -#define _LOAD_ATTR_SLOT_1 424 -#define _LOAD_ATTR_WITH_HINT 425 +#define _LOAD_ATTR_MODULE_FROM_KEYS 419 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 420 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 421 +#define _LOAD_ATTR_PROPERTY_FRAME 422 +#define _LOAD_ATTR_SLOT 423 +#define _LOAD_ATTR_SLOT_0 424 +#define _LOAD_ATTR_SLOT_1 425 +#define _LOAD_ATTR_WITH_HINT 426 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 426 +#define _LOAD_BYTECODE 427 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL -#define _LOAD_CONST_INLINE 427 -#define _LOAD_CONST_INLINE_BORROW 428 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 429 -#define _LOAD_CONST_INLINE_WITH_NULL 430 +#define _LOAD_CONST_INLINE 428 +#define _LOAD_CONST_INLINE_BORROW 429 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 430 +#define _LOAD_CONST_INLINE_WITH_NULL 431 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 431 -#define _LOAD_FAST_0 432 -#define _LOAD_FAST_1 433 -#define _LOAD_FAST_2 434 -#define _LOAD_FAST_3 435 -#define _LOAD_FAST_4 436 -#define _LOAD_FAST_5 437 -#define _LOAD_FAST_6 438 -#define _LOAD_FAST_7 439 +#define _LOAD_FAST 432 +#define _LOAD_FAST_0 433 +#define _LOAD_FAST_1 434 +#define _LOAD_FAST_2 435 +#define _LOAD_FAST_3 436 +#define _LOAD_FAST_4 437 +#define _LOAD_FAST_5 438 +#define _LOAD_FAST_6 439 +#define _LOAD_FAST_7 440 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 440 -#define _LOAD_GLOBAL_BUILTINS 441 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 442 -#define _LOAD_GLOBAL_MODULE 443 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 444 +#define _LOAD_GLOBAL 441 +#define _LOAD_GLOBAL_BUILTINS 442 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 443 +#define _LOAD_GLOBAL_MODULE 444 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 445 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 445 -#define _LOAD_SMALL_INT_0 446 -#define _LOAD_SMALL_INT_1 447 -#define _LOAD_SMALL_INT_2 448 -#define _LOAD_SMALL_INT_3 449 +#define _LOAD_SMALL_INT 446 +#define _LOAD_SMALL_INT_0 447 +#define _LOAD_SMALL_INT_1 448 +#define _LOAD_SMALL_INT_2 449 +#define _LOAD_SMALL_INT_3 450 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 450 +#define _MAKE_CALLARGS_A_TUPLE 451 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 451 +#define _MAKE_WARM 452 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 452 -#define _MAYBE_EXPAND_METHOD_KW 453 -#define _MONITOR_CALL 454 -#define _MONITOR_JUMP_BACKWARD 455 -#define _MONITOR_RESUME 456 +#define _MAYBE_EXPAND_METHOD 453 +#define _MAYBE_EXPAND_METHOD_KW 454 +#define _MONITOR_CALL 455 +#define _MONITOR_JUMP_BACKWARD 456 +#define _MONITOR_RESUME 457 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 457 -#define _POP_JUMP_IF_TRUE 458 +#define _POP_JUMP_IF_FALSE 458 +#define _POP_JUMP_IF_TRUE 459 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 459 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 460 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 460 +#define _PUSH_FRAME 461 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 461 -#define _PY_FRAME_KW 462 -#define _QUICKEN_RESUME 463 -#define _REPLACE_WITH_TRUE 464 +#define _PY_FRAME_GENERAL 462 +#define _PY_FRAME_KW 463 +#define _QUICKEN_RESUME 464 +#define _REPLACE_WITH_TRUE 465 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 465 -#define _SEND 466 -#define _SEND_GEN_FRAME 467 +#define _SAVE_RETURN_OFFSET 466 +#define _SEND 467 +#define _SEND_GEN_FRAME 468 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 468 -#define _STORE_ATTR 469 -#define _STORE_ATTR_INSTANCE_VALUE 470 -#define _STORE_ATTR_SLOT 471 -#define _STORE_ATTR_WITH_HINT 472 +#define _START_EXECUTOR 469 +#define _STORE_ATTR 470 +#define _STORE_ATTR_INSTANCE_VALUE 471 +#define _STORE_ATTR_SLOT 472 +#define _STORE_ATTR_WITH_HINT 473 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 473 -#define _STORE_FAST_0 474 -#define _STORE_FAST_1 475 -#define _STORE_FAST_2 476 -#define _STORE_FAST_3 477 -#define _STORE_FAST_4 478 -#define _STORE_FAST_5 479 -#define _STORE_FAST_6 480 -#define _STORE_FAST_7 481 +#define _STORE_FAST 474 +#define _STORE_FAST_0 475 +#define _STORE_FAST_1 476 +#define _STORE_FAST_2 477 +#define _STORE_FAST_3 478 +#define _STORE_FAST_4 479 +#define _STORE_FAST_5 480 +#define _STORE_FAST_6 481 +#define _STORE_FAST_7 482 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 482 -#define _STORE_SUBSCR 483 +#define _STORE_SLICE 483 +#define _STORE_SUBSCR 484 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 484 -#define _TO_BOOL 485 +#define _TIER2_RESUME_CHECK 485 +#define _TO_BOOL 486 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -304,13 +305,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 486 +#define _UNPACK_SEQUENCE 487 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 486 +#define MAX_UOP_ID 487 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 89fce193f40bd8..dd775d3f7d3cdd 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -152,8 +152,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_ATTR_INSTANCE_VALUE_0] = HAS_DEOPT_FLAG, [_LOAD_ATTR_INSTANCE_VALUE_1] = HAS_DEOPT_FLAG, [_LOAD_ATTR_INSTANCE_VALUE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_OPARG_AND_1_FLAG, - [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG, - [_LOAD_ATTR_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_CHECK_ATTR_MODULE_PUSH_KEYS] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_MODULE_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CHECK_ATTR_WITH_HINT] = HAS_EXIT_FLAG, [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG, [_LOAD_ATTR_SLOT_0] = HAS_DEOPT_FLAG, @@ -283,6 +283,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_LOAD_GLOBAL_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_LOAD_GLOBAL_BUILTINS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_ATTR_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, @@ -346,7 +347,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_AND_ALLOCATE_OBJECT] = "_CHECK_AND_ALLOCATE_OBJECT", [_CHECK_ATTR_CLASS] = "_CHECK_ATTR_CLASS", [_CHECK_ATTR_METHOD_LAZY_DICT] = "_CHECK_ATTR_METHOD_LAZY_DICT", - [_CHECK_ATTR_MODULE] = "_CHECK_ATTR_MODULE", + [_CHECK_ATTR_MODULE_PUSH_KEYS] = "_CHECK_ATTR_MODULE_PUSH_KEYS", [_CHECK_ATTR_WITH_HINT] = "_CHECK_ATTR_WITH_HINT", [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = "_CHECK_CALL_BOUND_METHOD_EXACT_ARGS", [_CHECK_EG_MATCH] = "_CHECK_EG_MATCH", @@ -459,6 +460,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_ATTR_METHOD_NO_DICT] = "_LOAD_ATTR_METHOD_NO_DICT", [_LOAD_ATTR_METHOD_WITH_VALUES] = "_LOAD_ATTR_METHOD_WITH_VALUES", [_LOAD_ATTR_MODULE] = "_LOAD_ATTR_MODULE", + [_LOAD_ATTR_MODULE_FROM_KEYS] = "_LOAD_ATTR_MODULE_FROM_KEYS", [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = "_LOAD_ATTR_NONDESCRIPTOR_NO_DICT", [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = "_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", [_LOAD_ATTR_PROPERTY_FRAME] = "_LOAD_ATTR_PROPERTY_FRAME", @@ -845,10 +847,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _LOAD_ATTR_INSTANCE_VALUE: return 1; - case _CHECK_ATTR_MODULE: + case _CHECK_ATTR_MODULE_PUSH_KEYS: return 0; - case _LOAD_ATTR_MODULE: - return 1; + case _LOAD_ATTR_MODULE_FROM_KEYS: + return 2; case _CHECK_ATTR_WITH_HINT: return 0; case _LOAD_ATTR_WITH_HINT: @@ -1107,6 +1109,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_GLOBAL_BUILTINS: return 0; + case _LOAD_ATTR_MODULE: + return 1; case _INTERNAL_INCREMENT_OPT_COUNTER: return 1; case _DYNAMIC_EXIT: diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 61512e610f46f2..ada30181aeeca9 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -48,6 +48,8 @@ # Skip this test if the _testcapi module isn't available. _testcapi = import_helper.import_module('_testcapi') +from _testcapi import HeapCTypeSubclass, HeapCTypeSubclassWithFinalizer + import _testlimitedcapi import _testinternalcapi @@ -653,9 +655,9 @@ def test_c_subclass_of_heap_ctype_with_tpdealloc_decrefs_once(self): self.assertEqual(type_refcnt - 1, sys.getrefcount(_testcapi.HeapCTypeSubclass)) def test_c_subclass_of_heap_ctype_with_del_modifying_dunder_class_only_decrefs_once(self): - subclass_instance = _testcapi.HeapCTypeSubclassWithFinalizer() - type_refcnt = sys.getrefcount(_testcapi.HeapCTypeSubclassWithFinalizer) - new_type_refcnt = sys.getrefcount(_testcapi.HeapCTypeSubclass) + subclass_instance = HeapCTypeSubclassWithFinalizer() + type_refcnt = sys.getrefcount(HeapCTypeSubclassWithFinalizer) + new_type_refcnt = sys.getrefcount(HeapCTypeSubclass) # Test that subclass instance was fully created self.assertEqual(subclass_instance.value, 10) @@ -665,19 +667,46 @@ def test_c_subclass_of_heap_ctype_with_del_modifying_dunder_class_only_decrefs_o del subclass_instance # Test that setting __class__ modified the reference counts of the types + # + # This is highly sensitive to implementation details and may break in the future. + # + # We expect the refcount on the old type, HeapCTypeSubclassWithFinalizer, to + # remain the same: the finalizer gets a strong reference (+1) when it gets the + # type from the module and setting __class__ decrements the refcount (-1). + # + # We expect the refcount on the new type, HeapCTypeSubclass, to increase by 2: + # the finalizer get a strong reference (+1) when it gets the type from the + # module and setting __class__ increments the refcount (+1). + expected_type_refcnt = type_refcnt + expected_new_type_refcnt = new_type_refcnt + 2 + + if not Py_GIL_DISABLED: + # In default builds the result returned from sys.getrefcount + # includes a temporary reference that is created by the interpreter + # when it pushes its argument on the operand stack. This temporary + # reference is not included in the result returned by Py_REFCNT, which + # is used in the finalizer. + # + # In free-threaded builds the result returned from sys.getrefcount + # does not include the temporary reference. Types use deferred + # refcounting and the interpreter will not create a new reference + # for deferred values on the operand stack. + expected_type_refcnt -= 1 + expected_new_type_refcnt -= 1 + if support.Py_DEBUG: # gh-89373: In debug mode, _Py_Dealloc() keeps a strong reference # to the type while calling tp_dealloc() - self.assertEqual(type_refcnt, _testcapi.HeapCTypeSubclassWithFinalizer.refcnt_in_del) - else: - self.assertEqual(type_refcnt - 1, _testcapi.HeapCTypeSubclassWithFinalizer.refcnt_in_del) - self.assertEqual(new_type_refcnt + 1, _testcapi.HeapCTypeSubclass.refcnt_in_del) + expected_type_refcnt += 1 + + self.assertEqual(expected_type_refcnt, HeapCTypeSubclassWithFinalizer.refcnt_in_del) + self.assertEqual(expected_new_type_refcnt, HeapCTypeSubclass.refcnt_in_del) # Test that the original type already has decreased its refcnt - self.assertEqual(type_refcnt - 1, sys.getrefcount(_testcapi.HeapCTypeSubclassWithFinalizer)) + self.assertEqual(type_refcnt - 1, sys.getrefcount(HeapCTypeSubclassWithFinalizer)) # Test that subtype_dealloc decref the newly assigned __class__ only once - self.assertEqual(new_type_refcnt, sys.getrefcount(_testcapi.HeapCTypeSubclass)) + self.assertEqual(new_type_refcnt, sys.getrefcount(HeapCTypeSubclass)) def test_heaptype_with_setattro(self): obj = _testcapi.HeapCTypeSetattr() diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 66862ec17cca98..9c65e81dfe4be1 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1639,6 +1639,80 @@ def test_escaping_call_next_to_cmacro(self): """ self.run_cases_test(input, output) + def test_pop_dead_inputs_all_live(self): + input = """ + inst(OP, (a, b --)) { + POP_DEAD_INPUTS(); + HAM(a, b); + INPUTS_DEAD(); + } + """ + output = """ + TARGET(OP) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP); + _PyStackRef a; + _PyStackRef b; + b = stack_pointer[-1]; + a = stack_pointer[-2]; + HAM(a, b); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + + def test_pop_dead_inputs_some_live(self): + input = """ + inst(OP, (a, b, c --)) { + POP_DEAD_INPUTS(); + HAM(a); + INPUTS_DEAD(); + } + """ + output = """ + TARGET(OP) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP); + _PyStackRef a; + a = stack_pointer[-3]; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + HAM(a); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + + def test_pop_dead_inputs_with_output(self): + input = """ + inst(OP, (a, b -- c)) { + POP_DEAD_INPUTS(); + c = SPAM(); + } + """ + output = """ + TARGET(OP) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP); + _PyStackRef c; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + c = SPAM(); + stack_pointer[0] = c; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + class TestGeneratedAbstractCases(unittest.TestCase): def setUp(self) -> None: diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 50b5f365165921..0a7557adc4763b 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -892,7 +892,7 @@ def write(items): opname = "LOAD_ATTR_METHOD_WITH_VALUES" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_load_attr_module(self): def get_items(): items = [] diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f0eb5405faeff5..772b46d17ec198 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2070,7 +2070,7 @@ dummy_func( }; specializing op(_SPECIALIZE_LOAD_ATTR, (counter/1, owner -- owner)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -2079,7 +2079,7 @@ dummy_func( } OPCODE_DEFERRED_INC(LOAD_ATTR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_LOAD_ATTR, (owner -- attr, self_or_null if (oparg & 1))) { @@ -2158,33 +2158,43 @@ dummy_func( _LOAD_ATTR_INSTANCE_VALUE + unused/5; // Skip over rest of cache - op(_CHECK_ATTR_MODULE, (dict_version/2, owner -- owner)) { + op(_CHECK_ATTR_MODULE_PUSH_KEYS, (dict_version/2, owner -- owner, mod_keys: PyDictKeysObject *)) { PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); DEOPT_IF(Py_TYPE(owner_o)->tp_getattro != PyModule_Type.tp_getattro); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; assert(dict != NULL); - DEOPT_IF(dict->ma_keys->dk_version != dict_version); - } - - op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { - PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; - assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); - assert(index < dict->ma_keys->dk_nentries); - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; - PyObject *attr_o = ep->me_value; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != dict_version); + mod_keys = keys; + } + + op(_LOAD_ATTR_MODULE_FROM_KEYS, (index/1, owner, mod_keys: PyDictKeysObject * -- attr, null if (oparg & 1))) { + assert(mod_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(mod_keys->dk_nentries)); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index; + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + DEAD(mod_keys); + // Clear mod_keys from stack in case we need to deopt + POP_DEAD_INPUTS(); DEOPT_IF(attr_o == NULL); - STAT_INC(LOAD_ATTR, hit); + #ifdef Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); + if (!increfed) { + DEOPT_IF(true); + } + #else Py_INCREF(attr_o); attr = PyStackRef_FromPyObjectSteal(attr_o); + #endif + STAT_INC(LOAD_ATTR, hit); null = PyStackRef_NULL; - DECREF_INPUTS(); + PyStackRef_CLOSE(owner); } macro(LOAD_ATTR_MODULE) = unused/1 + - _CHECK_ATTR_MODULE + - _LOAD_ATTR_MODULE + + _CHECK_ATTR_MODULE_PUSH_KEYS + + _LOAD_ATTR_MODULE_FROM_KEYS + unused/5; op(_CHECK_ATTR_WITH_HINT, (owner -- owner)) { @@ -4963,6 +4973,21 @@ dummy_func( null = PyStackRef_NULL; } + tier2 op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { + PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); + PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; + assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < dict->ma_keys->dk_nentries); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; + PyObject *attr_o = ep->me_value; + DEOPT_IF(attr_o == NULL); + STAT_INC(LOAD_ATTR, hit); + Py_INCREF(attr_o); + attr = PyStackRef_FromPyObjectSteal(attr_o); + null = PyStackRef_NULL; + DECREF_INPUTS(); + } + /* Internal -- for testing executors */ op(_INTERNAL_INCREMENT_OPT_COUNTER, (opt --)) { _PyCounterOptimizerObject *exe = (_PyCounterOptimizerObject *)PyStackRef_AsPyObjectBorrow(opt); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 19ba67a8af6769..55e9c3aa2db64d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2641,8 +2641,9 @@ /* _LOAD_ATTR_INSTANCE_VALUE is split on (oparg & 1) */ - case _CHECK_ATTR_MODULE: { + case _CHECK_ATTR_MODULE_PUSH_KEYS: { _PyStackRef owner; + PyDictKeysObject *mod_keys; owner = stack_pointer[-1]; uint32_t dict_version = (uint32_t)CURRENT_OPERAND0(); PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); @@ -2652,33 +2653,51 @@ } PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; assert(dict != NULL); - if (dict->ma_keys->dk_version != dict_version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != dict_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + mod_keys = keys; + stack_pointer[0].bits = (uintptr_t)mod_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_ATTR_MODULE: { + case _LOAD_ATTR_MODULE_FROM_KEYS: { + PyDictKeysObject *mod_keys; _PyStackRef owner; _PyStackRef attr; _PyStackRef null = PyStackRef_NULL; oparg = CURRENT_OPARG(); - owner = stack_pointer[-1]; + mod_keys = (PyDictKeysObject *)stack_pointer[-1].bits; + owner = stack_pointer[-2]; uint16_t index = (uint16_t)CURRENT_OPERAND0(); - PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; - assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); - assert(index < dict->ma_keys->dk_nentries); - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; - PyObject *attr_o = ep->me_value; + assert(mod_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(mod_keys->dk_nentries)); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index; + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + // Clear mod_keys from stack in case we need to deopt + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); if (attr_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - STAT_INC(LOAD_ATTR, hit); + #ifdef Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); + if (!increfed) { + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } + #else Py_INCREF(attr_o); attr = PyStackRef_FromPyObjectSteal(attr_o); + #endif + STAT_INC(LOAD_ATTR, hit); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; @@ -5928,6 +5947,35 @@ break; } + case _LOAD_ATTR_MODULE: { + _PyStackRef owner; + _PyStackRef attr; + _PyStackRef null = PyStackRef_NULL; + oparg = CURRENT_OPARG(); + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)CURRENT_OPERAND0(); + PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); + PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; + assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < dict->ma_keys->dk_nentries); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; + PyObject *attr_o = ep->me_value; + if (attr_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(LOAD_ATTR, hit); + Py_INCREF(attr_o); + attr = PyStackRef_FromPyObjectSteal(attr_o); + null = PyStackRef_NULL; + PyStackRef_CLOSE(owner); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _INTERNAL_INCREMENT_OPT_COUNTER: { _PyStackRef opt; opt = stack_pointer[-1]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 51227c9868b8cc..94343f953221eb 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5200,7 +5200,7 @@ owner = stack_pointer[-1]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -5211,7 +5211,7 @@ } OPCODE_DEFERRED_INC(LOAD_ATTR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 8 cache entries */ // _LOAD_ATTR @@ -5553,10 +5553,11 @@ INSTRUCTION_STATS(LOAD_ATTR_MODULE); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; + PyDictKeysObject *mod_keys; _PyStackRef attr; _PyStackRef null = PyStackRef_NULL; /* Skip 1 cache entry */ - // _CHECK_ATTR_MODULE + // _CHECK_ATTR_MODULE_PUSH_KEYS { owner = stack_pointer[-1]; uint32_t dict_version = read_u32(&this_instr[2].cache); @@ -5564,21 +5565,29 @@ DEOPT_IF(Py_TYPE(owner_o)->tp_getattro != PyModule_Type.tp_getattro, LOAD_ATTR); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; assert(dict != NULL); - DEOPT_IF(dict->ma_keys->dk_version != dict_version, LOAD_ATTR); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != dict_version, LOAD_ATTR); + mod_keys = keys; } - // _LOAD_ATTR_MODULE + // _LOAD_ATTR_MODULE_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); - PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; - assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); - assert(index < dict->ma_keys->dk_nentries); - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; - PyObject *attr_o = ep->me_value; + assert(mod_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(mod_keys->dk_nentries)); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index; + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + // Clear mod_keys from stack in case we need to deopt DEOPT_IF(attr_o == NULL, LOAD_ATTR); - STAT_INC(LOAD_ATTR, hit); + #ifdef Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); + if (!increfed) { + DEOPT_IF(true, LOAD_ATTR); + } + #else Py_INCREF(attr_o); attr = PyStackRef_FromPyObjectSteal(attr_o); + #endif + STAT_INC(LOAD_ATTR, hit); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a4a0472b64e57c..0ef15c630e91db 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -95,7 +95,7 @@ type_watcher_callback(PyTypeObject* type) static PyObject * convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj) { - assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS || inst->opcode == _LOAD_ATTR_MODULE); + assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS || inst->opcode == _LOAD_ATTR_MODULE_FROM_KEYS); assert(PyDict_CheckExact(obj)); PyDictObject *dict = (PyDictObject *)obj; assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 42bdbd9ca8d0cd..0b8aff02367e31 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -492,8 +492,9 @@ dummy_func(void) { (void)owner; } - op(_CHECK_ATTR_MODULE, (dict_version/2, owner -- owner)) { + op(_CHECK_ATTR_MODULE_PUSH_KEYS, (dict_version/2, owner -- owner, mod_keys)) { (void)dict_version; + mod_keys = sym_new_not_null(ctx); if (sym_is_const(owner)) { PyObject *cnst = sym_get_const(owner); if (PyModule_CheckExact(cnst)) { @@ -515,12 +516,12 @@ dummy_func(void) { self_or_null = sym_new_unknown(ctx); } - op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { + op(_LOAD_ATTR_MODULE_FROM_KEYS, (index/1, owner, mod_keys -- attr, null if (oparg & 1))) { (void)index; null = sym_new_null(ctx); attr = NULL; if (this_instr[-1].opcode == _NOP) { - // Preceding _CHECK_ATTR_MODULE was removed: mod is const and dict is watched. + // Preceding _CHECK_ATTR_MODULE_PUSH_KEYS was removed: mod is const and dict is watched. assert(sym_is_const(owner)); PyModuleObject *mod = (PyModuleObject *)sym_get_const(owner); assert(PyModule_CheckExact(mod)); @@ -530,6 +531,9 @@ dummy_func(void) { this_instr[-1].opcode = _POP_TOP; attr = sym_new_const(ctx, res); } + else { + this_instr->opcode = _LOAD_ATTR_MODULE; + } } if (attr == NULL) { /* No conversion made. We don't know what `attr` is. */ diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index f77a5aa35bdf82..f4fbe8c8aa0480 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1134,61 +1134,74 @@ break; } - case _CHECK_ATTR_MODULE: { + case _CHECK_ATTR_MODULE_PUSH_KEYS: { _Py_UopsSymbol *owner; + _Py_UopsSymbol *mod_keys; owner = stack_pointer[-1]; uint32_t dict_version = (uint32_t)this_instr->operand0; (void)dict_version; + mod_keys = sym_new_not_null(ctx); if (sym_is_const(owner)) { PyObject *cnst = sym_get_const(owner); if (PyModule_CheckExact(cnst)) { PyModuleObject *mod = (PyModuleObject *)cnst; PyObject *dict = mod->md_dict; + stack_pointer[0] = mod_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); uint64_t watched_mutations = get_mutations(dict); if (watched_mutations < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { PyDict_Watch(GLOBALS_WATCHER_ID, dict); _Py_BloomFilter_Add(dependencies, dict); this_instr->opcode = _NOP; } + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); } } + stack_pointer[0] = mod_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_ATTR_MODULE: { + case _LOAD_ATTR_MODULE_FROM_KEYS: { _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *null = NULL; - owner = stack_pointer[-1]; + owner = stack_pointer[-2]; uint16_t index = (uint16_t)this_instr->operand0; (void)index; null = sym_new_null(ctx); attr = NULL; if (this_instr[-1].opcode == _NOP) { - // Preceding _CHECK_ATTR_MODULE was removed: mod is const and dict is watched. + // Preceding _CHECK_ATTR_MODULE_PUSH_KEYS was removed: mod is const and dict is watched. assert(sym_is_const(owner)); PyModuleObject *mod = (PyModuleObject *)sym_get_const(owner); assert(PyModule_CheckExact(mod)); PyObject *dict = mod->md_dict; - stack_pointer[-1] = attr; - if (oparg & 1) stack_pointer[0] = null; - stack_pointer += (oparg & 1); + stack_pointer[-2] = attr; + if (oparg & 1) stack_pointer[-1] = null; + stack_pointer += -1 + (oparg & 1); assert(WITHIN_STACK_BOUNDS()); PyObject *res = convert_global_to_const(this_instr, dict); if (res != NULL) { this_instr[-1].opcode = _POP_TOP; attr = sym_new_const(ctx, res); } - stack_pointer += -(oparg & 1); + else { + this_instr->opcode = _LOAD_ATTR_MODULE; + } + stack_pointer += 1 - (oparg & 1); assert(WITHIN_STACK_BOUNDS()); } if (attr == NULL) { /* No conversion made. We don't know what `attr` is. */ attr = sym_new_not_null(ctx); } - stack_pointer[-1] = attr; - if (oparg & 1) stack_pointer[0] = null; - stack_pointer += (oparg & 1); + stack_pointer[-2] = attr; + if (oparg & 1) stack_pointer[-1] = null; + stack_pointer += -1 + (oparg & 1); assert(WITHIN_STACK_BOUNDS()); break; } @@ -2528,6 +2541,18 @@ break; } + case _LOAD_ATTR_MODULE: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *null = NULL; + attr = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _INTERNAL_INCREMENT_OPT_COUNTER: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/specialize.c b/Python/specialize.c index fd182e7d7a9215..6eb298217ec2d3 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -738,22 +738,16 @@ unspecialize(_Py_CODEUNIT *instr) } static int function_kind(PyCodeObject *code); +#ifndef Py_GIL_DISABLED static bool function_check_args(PyObject *o, int expected_argcount, int opcode); static uint32_t function_get_version(PyObject *o, int opcode); +#endif static uint32_t type_get_version(PyTypeObject *t, int opcode); static int -specialize_module_load_attr( - PyObject *owner, _Py_CODEUNIT *instr, PyObject *name -) { +specialize_module_load_attr_lock_held(PyDictObject *dict, _Py_CODEUNIT *instr, PyObject *name) +{ _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); - PyModuleObject *m = (PyModuleObject *)owner; - assert((Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); - PyDictObject *dict = (PyDictObject *)m->md_dict; - if (dict == NULL) { - SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NO_DICT); - return -1; - } if (dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_NON_STRING); return -1; @@ -773,19 +767,35 @@ specialize_module_load_attr( SPEC_FAIL_OUT_OF_RANGE); return -1; } - uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState( - _PyInterpreterState_GET(), dict->ma_keys); + uint32_t keys_version = _PyDict_GetKeysVersionForCurrentState( + _PyInterpreterState_GET(), dict); if (keys_version == 0) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS); return -1; } write_u32(cache->version, keys_version); cache->index = (uint16_t)index; - instr->op.code = LOAD_ATTR_MODULE; + specialize(instr, LOAD_ATTR_MODULE); return 0; } - +static int +specialize_module_load_attr( + PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) +{ + PyModuleObject *m = (PyModuleObject *)owner; + assert((Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); + PyDictObject *dict = (PyDictObject *)m->md_dict; + if (dict == NULL) { + SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NO_DICT); + return -1; + } + int result; + Py_BEGIN_CRITICAL_SECTION(dict); + result = specialize_module_load_attr_lock_held(dict, instr, name); + Py_END_CRITICAL_SECTION(); + return result; +} /* Attribute specialization */ @@ -968,7 +978,7 @@ specialize_dict_access( } write_u32(cache->version, type->tp_version_tag); cache->index = (uint16_t)offset; - instr->op.code = values_op; + specialize(instr, values_op); } else { PyDictObject *dict = _PyObject_GetManagedDict(owner); @@ -992,11 +1002,12 @@ specialize_dict_access( } cache->index = (uint16_t)index; write_u32(cache->version, type->tp_version_tag); - instr->op.code = hint_op; + specialize(instr, hint_op); } return 1; } +#ifndef Py_GIL_DISABLED static int specialize_attr_loadclassattr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* name, PyObject* descr, DescriptorClassification kind, bool is_method); static int specialize_class_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* name); @@ -1093,7 +1104,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na write_u32(lm_cache->type_version, type->tp_version_tag); /* borrowed */ write_obj(lm_cache->descr, fget); - instr->op.code = LOAD_ATTR_PROPERTY; + specialize(instr, LOAD_ATTR_PROPERTY); return 0; } case OBJECT_SLOT: @@ -1117,7 +1128,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na assert(offset > 0); cache->index = (uint16_t)offset; write_u32(cache->version, type->tp_version_tag); - instr->op.code = LOAD_ATTR_SLOT; + specialize(instr, LOAD_ATTR_SLOT); return 0; } case DUNDER_CLASS: @@ -1126,7 +1137,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na assert(offset == (uint16_t)offset); cache->index = (uint16_t)offset; write_u32(cache->version, type->tp_version_tag); - instr->op.code = LOAD_ATTR_SLOT; + specialize(instr, LOAD_ATTR_SLOT); return 0; } case OTHER_SLOT: @@ -1162,7 +1173,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na /* borrowed */ write_obj(lm_cache->descr, descr); write_u32(lm_cache->type_version, type->tp_version_tag); - instr->op.code = LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN; + specialize(instr, LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN); return 0; } case BUILTIN_CLASSMETHOD: @@ -1186,6 +1197,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na if (shadow) { goto try_instance; } + set_counter((_Py_BackoffCounter*)instr + 1, adaptive_counter_cooldown()); return 0; } Py_UNREACHABLE(); @@ -1197,14 +1209,14 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na } return -1; } +#endif // Py_GIL_DISABLED void _Py_Specialize_LoadAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *name) { - _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyObject *owner = PyStackRef_AsPyObjectBorrow(owner_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[LOAD_ATTR] == INLINE_CACHE_ENTRIES_LOAD_ATTR); PyTypeObject *type = Py_TYPE(owner); bool fail; @@ -1219,22 +1231,24 @@ _Py_Specialize_LoadAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *nam fail = specialize_module_load_attr(owner, instr, name); } else if (PyType_Check(owner)) { + #ifdef Py_GIL_DISABLED + SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_EXPECTED_ERROR); + fail = true; + #else fail = specialize_class_load_attr(owner, instr, name); + #endif } else { + #ifdef Py_GIL_DISABLED + SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_EXPECTED_ERROR); + fail = true; + #else fail = specialize_instance_load_attr(owner, instr, name); + #endif } if (fail) { - STAT_INC(LOAD_ATTR, failure); - assert(!PyErr_Occurred()); - instr->op.code = LOAD_ATTR; - cache->counter = adaptive_counter_backoff(cache->counter); - } - else { - STAT_INC(LOAD_ATTR, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } } @@ -1339,6 +1353,7 @@ _Py_Specialize_StoreAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *na cache->counter = adaptive_counter_cooldown(); } +#ifndef Py_GIL_DISABLED #ifdef Py_STATS static int @@ -1422,10 +1437,10 @@ specialize_class_load_attr(PyObject *owner, _Py_CODEUNIT *instr, write_obj(cache->descr, descr); if (metaclass_check) { write_u32(cache->keys_version, Py_TYPE(cls)->tp_version_tag); - instr->op.code = LOAD_ATTR_CLASS_WITH_METACLASS_CHECK; + specialize(instr, LOAD_ATTR_CLASS_WITH_METACLASS_CHECK); } else { - instr->op.code = LOAD_ATTR_CLASS; + specialize(instr, LOAD_ATTR_CLASS); } return 0; #ifdef Py_STATS @@ -1461,7 +1476,7 @@ PyObject *descr, DescriptorClassification kind, bool is_method) return 0; } write_u32(cache->keys_version, keys_version); - instr->op.code = is_method ? LOAD_ATTR_METHOD_WITH_VALUES : LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES; + specialize(instr, is_method ? LOAD_ATTR_METHOD_WITH_VALUES : LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES); } else { Py_ssize_t dictoffset; @@ -1476,7 +1491,7 @@ PyObject *descr, DescriptorClassification kind, bool is_method) } } if (dictoffset == 0) { - instr->op.code = is_method ? LOAD_ATTR_METHOD_NO_DICT : LOAD_ATTR_NONDESCRIPTOR_NO_DICT; + specialize(instr, is_method ? LOAD_ATTR_METHOD_NO_DICT : LOAD_ATTR_NONDESCRIPTOR_NO_DICT); } else if (is_method) { PyObject *dict = *(PyObject **) ((char *)owner + dictoffset); @@ -1490,7 +1505,7 @@ PyObject *descr, DescriptorClassification kind, bool is_method) dictoffset -= MANAGED_DICT_OFFSET; assert(((uint16_t)dictoffset) == dictoffset); cache->dict_offset = (uint16_t)dictoffset; - instr->op.code = LOAD_ATTR_METHOD_LAZY_DICT; + specialize(instr, LOAD_ATTR_METHOD_LAZY_DICT); } else { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_CLASS_ATTR_SIMPLE); @@ -1516,6 +1531,9 @@ PyObject *descr, DescriptorClassification kind, bool is_method) return 1; } +#endif // Py_GIL_DISABLED + + static void specialize_load_global_lock_held( PyObject *globals, PyObject *builtins, @@ -1661,6 +1679,7 @@ function_kind(PyCodeObject *code) { return SIMPLE_FUNCTION; } +#ifndef Py_GIL_DISABLED /* Returning false indicates a failure. */ static bool function_check_args(PyObject *o, int expected_argcount, int opcode) @@ -1693,6 +1712,7 @@ function_get_version(PyObject *o, int opcode) } return version; } +#endif // Py_GIL_DISABLED /* Returning 0 indicates a failure. */ static uint32_t diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index dad2557e97a948..d17617cab0266b 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -120,6 +120,7 @@ def __init__(self, out: CWriter): "PyStackRef_AsPyObjectSteal": self.stackref_steal, "DISPATCH": self.dispatch, "INSTRUCTION_SIZE": self.instruction_size, + "POP_DEAD_INPUTS": self.pop_dead_inputs, } self.out = out @@ -348,6 +349,20 @@ def save_stack( self.emit_save(storage) return True + def pop_dead_inputs( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: Uop, + storage: Storage, + inst: Instruction | None, + ) -> bool: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + storage.pop_dead_inputs(self.out) + return True + def emit_reload(self, storage: Storage) -> None: storage.reload(self.out) self._print_storage(storage) diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 286f47d0cfb11b..9471fe0e56f7d8 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -512,6 +512,10 @@ def flush(self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = self._push_defined_outputs() self.stack.flush(out, cast_type, extract_bits) + def pop_dead_inputs(self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = True) -> None: + self.clear_dead_inputs() + self.stack.flush(out, cast_type, extract_bits) + def save(self, out: CWriter) -> None: assert self.spilled >= 0 if self.spilled == 0: