From 2ec65dc331054cf68f7a3b740c19db7f64533ff5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Oct 2024 11:37:19 +0100 Subject: [PATCH 01/20] Initial implementation of tagged stackrefs for GIL build. Requires fixes to immortal objects. --- Include/internal/pycore_stackref.h | 97 +++++++++++++++++++++++++----- Include/refcount.h | 93 +++++++++++++++++++++++++--- Python/bytecodes.c | 10 +-- Python/ceval.c | 4 -- Python/ceval_macros.h | 11 ---- Python/executor_cases.c.h | 10 +-- Python/generated_cases.c.h | 10 +-- Python/sysmodule.c | 3 + 8 files changed, 188 insertions(+), 50 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 7d1eb11aa5ecb8..1805eee642c8f6 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -54,12 +54,13 @@ typedef union _PyStackRef { } _PyStackRef; +#ifdef Py_GIL_DISABLED + #define Py_TAG_DEFERRED (1) #define Py_TAG_PTR ((uintptr_t)0) #define Py_TAG_BITS ((uintptr_t)1) -#ifdef Py_GIL_DISABLED static const _PyStackRef PyStackRef_NULL = { .bits = Py_TAG_DEFERRED}; #define PyStackRef_IsNull(stackref) ((stackref).bits == PyStackRef_NULL.bits) @@ -153,36 +154,104 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) return PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(stackref)); } - #else // Py_GIL_DISABLED // With GIL + +#define Py_TAG_BITS 1 +#define Py_TAG_REFCNT 1 +#define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) +#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) + static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; -#define PyStackRef_IsNull(stackref) ((stackref).bits == 0) -#define PyStackRef_True ((_PyStackRef){.bits = (uintptr_t)&_Py_TrueStruct }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) }) -#define PyStackRef_AsPyObjectBorrow(stackref) ((PyObject *)(stackref).bits) +#define PyStackRef_IsNull(ref) ((ref).bits == 0) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) -#define PyStackRef_AsPyObjectSteal(stackref) PyStackRef_AsPyObjectBorrow(stackref) +static inline int +PyStackRef_HasCount(_PyStackRef ref) +{ + return ref.bits & Py_TAG_REFCNT; +} -#define PyStackRef_FromPyObjectSteal(obj) ((_PyStackRef){.bits = ((uintptr_t)(obj))}) +static inline PyObject * +PyStackRef_AsPyObjectBorrow(_PyStackRef ref) +{ + return BITS_TO_PTR_MASKED(ref); +} -#define PyStackRef_FromPyObjectNew(obj) ((_PyStackRef){ .bits = (uintptr_t)(Py_NewRef(obj)) }) +static inline PyObject * +PyStackRef_AsPyObjectSteal(_PyStackRef ref) +{ + if (PyStackRef_HasCount(ref)) { + return Py_NewRef(BITS_TO_PTR_MASKED(ref)); + } + else { + return BITS_TO_PTR(ref); + } +} -#define PyStackRef_FromPyObjectImmortal(obj) ((_PyStackRef){ .bits = (uintptr_t)(obj) }) +/* We will want to extend this to a larger set of objects in the future */ +#define _Py_IsDeferrable _Py_IsImmortal -#define PyStackRef_CLOSE(stackref) Py_DECREF(PyStackRef_AsPyObjectBorrow(stackref)) +static inline _PyStackRef +PyStackRef_FromPyObjectSteal(PyObject *obj) +{ + assert(obj != NULL); + unsigned int tag = _Py_IsDeferrable(obj) ? Py_TAG_REFCNT : 0; + _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); + return ref; +} + +static inline _PyStackRef +_PyStackRef_FromPyObjectNew(PyObject *obj) +{ + if (_Py_IsDeferrable(obj)) { + return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; + } + Py_INCREF(obj); + _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; + return ref; +} +#define PyStackRef_FromPyObjectNew(obj) _PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) + +/* Create a new reference from an object with an embedded reference count */ +static inline _PyStackRef +_PyStackRef_FromPyObjectWithCount(PyObject *obj) +{ + return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; +} +#define PyStackRef_FromPyObjectWithCount(obj) _PyStackRef_FromPyObjectWithCount(_PyObject_CAST(obj)) -#define PyStackRef_DUP(stackref) PyStackRef_FromPyObjectSteal(Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref))) +#define PyStackRef_FromPyObjectImmortal PyStackRef_FromPyObjectWithCount +static inline _PyStackRef +PyStackRef_DUP(_PyStackRef ref) +{ + assert(!PyStackRef_IsNull(ref)); + if (!PyStackRef_HasCount(ref)) { + Py_INCREF_MORTAL(BITS_TO_PTR(ref)); + } + return ref; +} + +static inline void +PyStackRef_CLOSE(_PyStackRef ref) +{ + assert(!PyStackRef_IsNull(ref)); + if (!PyStackRef_HasCount(ref)) { + Py_DECREF_MORTAL(BITS_TO_PTR(ref)); + } +} + #endif // Py_GIL_DISABLED // Note: this is a macro because MSVC (Windows) has trouble inlining it. -#define PyStackRef_Is(a, b) ((a).bits == (b).bits) +#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS))) // Converts a PyStackRef back to a PyObject *, converting the // stackref to a new reference. diff --git a/Include/refcount.h b/Include/refcount.h index 9a4e15065ecab8..82d247da18ffed 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -34,7 +34,7 @@ having all the lower 32 bits set, which will avoid the reference count to go beyond the refcount limit. Immortality checks for reference count decreases will be done by checking the bit sign flag in the lower 32 bits. */ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX) +#define _Py_IMMORTAL_REFCNT ((Py_ssize_t)0xb0000000) #else /* @@ -274,6 +274,32 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) # define Py_INCREF(op) Py_INCREF(_PyObject_CAST(op)) #endif +static inline Py_ALWAYS_INLINE void Py_INCREF_MORTAL(PyObject *op) +{ +#if defined(Py_GIL_DISABLED) + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + uint32_t new_local = local + 1; + assert (new_local != 0); + if (_Py_IsOwnedByCurrentThread(op)) { + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, new_local); + } + else { + _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); + } +#elif SIZEOF_VOID_P > 4 + PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; + PY_UINT32_T new_refcnt = cur_refcnt + 1; + op->ob_refcnt_split[PY_BIG_ENDIAN] = new_refcnt; +#else + op->ob_refcnt++; +#endif + _Py_INCREF_STAT_INC(); +#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) + if (!_Py_IsImmortal(op)) { + _Py_INCREF_IncRefTotal(); + } +#endif +} #if !defined(Py_LIMITED_API) && defined(Py_GIL_DISABLED) // Implements Py_DECREF on objects not owned by the current thread. @@ -302,6 +328,27 @@ static inline void Py_DECREF(PyObject *op) { #define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) #elif defined(Py_GIL_DISABLED) && defined(Py_REF_DEBUG) +static inline void Py_DECREF_MORTAL(const char *filename, int lineno, PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (_Py_IsOwnedByCurrentThread(op)) { + if (local == 0) { + _Py_NegativeRefcount(filename, lineno, op); + } + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefSharedDebug(op, filename, lineno); + } +} +#define Py_DECREF_MORTAL(op) Py_DECREF_MORTAL(__FILE__, __LINE__, _PyObject_CAST(op)) + static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) { uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); @@ -328,6 +375,22 @@ static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) #define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) #elif defined(Py_GIL_DISABLED) +static inline void Py_DECREF_MORTAL(PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + _Py_DECREF_STAT_INC(); + if (_Py_IsOwnedByCurrentThread(op)) { + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefShared(op); + } +} + static inline void Py_DECREF(PyObject *op) { uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); @@ -350,6 +413,15 @@ static inline void Py_DECREF(PyObject *op) #define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) #elif defined(Py_REF_DEBUG) +static inline Py_ALWAYS_INLINE void Py_DECREF_MORTAL(PyObject *op) +{ + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (--op->ob_refcnt == 0) { + _Py_Dealloc(op); + } +} + static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) { if (op->ob_refcnt <= 0) { @@ -359,15 +431,21 @@ static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) _Py_DECREF_IMMORTAL_STAT_INC(); return; } - _Py_DECREF_STAT_INC(); - _Py_DECREF_DecRefTotal(); + Py_DECREF_MORTAL(op); +} +#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) + +#else +static inline Py_ALWAYS_INLINE void Py_DECREF_MORTAL(PyObject *op) +{ + if (!_Py_IsImmortal(op)) { + _Py_DECREF_STAT_INC(); + } if (--op->ob_refcnt == 0) { _Py_Dealloc(op); } } -#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) -#else static inline Py_ALWAYS_INLINE void Py_DECREF(PyObject *op) { // Non-limited C API and limited C API for Python 3.9 and older access @@ -376,10 +454,7 @@ static inline Py_ALWAYS_INLINE void Py_DECREF(PyObject *op) _Py_DECREF_IMMORTAL_STAT_INC(); return; } - _Py_DECREF_STAT_INC(); - if (--op->ob_refcnt == 0) { - _Py_Dealloc(op); - } + Py_DECREF_MORTAL(op); } #define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) #endif diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 87cca3fc1d373c..d58a78961fcb83 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3627,11 +3627,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *str = PyObject_Str(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(str == NULL, error); + res = PyStackRef_FromPyObjectSteal(str); } macro(CALL_STR_1) = @@ -3648,11 +3649,12 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null)); DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *tuple = PySequence_Tuple(arg_o); DEAD(null); DEAD(callable); PyStackRef_CLOSE(arg); - ERROR_IF(PyStackRef_IsNull(res), error); + ERROR_IF(tuple == NULL, error); + res = PyStackRef_FromPyObjectSteal(tuple); } macro(CALL_TUPLE_1) = diff --git a/Python/ceval.c b/Python/ceval.c index f4e0add3034707..970f4fe627e3f4 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1765,7 +1765,6 @@ _PyEvalFramePushAndInit_UnTagged(PyThreadState *tstate, _PyStackRef func, PyObject *locals, PyObject *const* args, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous) { -#if defined(Py_GIL_DISABLED) size_t kw_count = kwnames == NULL ? 0 : PyTuple_GET_SIZE(kwnames); size_t total_argcount = argcount + kw_count; _PyStackRef *tagged_args_buffer = PyMem_Malloc(sizeof(_PyStackRef) * total_argcount); @@ -1782,9 +1781,6 @@ _PyEvalFramePushAndInit_UnTagged(PyThreadState *tstate, _PyStackRef func, _PyInterpreterFrame *res = _PyEvalFramePushAndInit(tstate, func, locals, (_PyStackRef const *)tagged_args_buffer, argcount, kwnames, previous); PyMem_Free(tagged_args_buffer); return res; -#else - return _PyEvalFramePushAndInit(tstate, func, locals, (_PyStackRef const *)args, argcount, kwnames, previous); -#endif } /* Same as _PyEvalFramePushAndInit but takes an args tuple and kwargs dict. diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index e0e9cc156ed62f..578e7e569d92f7 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -442,25 +442,14 @@ do { \ /* How much scratch space to give stackref to PyObject* conversion. */ #define MAX_STACKREF_SCRATCH 10 -#ifdef Py_GIL_DISABLED #define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \ /* +1 because vectorcall might use -1 to write self */ \ PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \ PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1); -#else -#define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \ - PyObject **NAME = (PyObject **)ARGS; \ - assert(NAME != NULL); -#endif -#ifdef Py_GIL_DISABLED #define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \ /* +1 because we +1 previously */ \ _PyObjectArray_Free(NAME - 1, NAME##_temp); -#else -#define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \ - (void)(NAME); -#endif #ifdef Py_GIL_DISABLED #define CONVERSION_FAILED(NAME) ((NAME) == NULL) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 57e15f33ca7703..6af50aa6642437 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4297,10 +4297,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *str = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (str == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(str); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); @@ -4329,10 +4330,11 @@ } STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *tuple = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); + if (tuple == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(tuple); stack_pointer[-3] = res; stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7656ce6bb7e313..4ca0a72f37ea37 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2976,10 +2976,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyUnicode_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PyObject_Str(arg_o)); + PyObject *str = PyObject_Str(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (str == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(str); } // _CHECK_PERIODIC { @@ -3026,10 +3027,11 @@ DEOPT_IF(callable_o != (PyObject *)&PyTuple_Type, CALL); STAT_INC(CALL, hit); _PyFrame_SetStackPointer(frame, stack_pointer); - res = PyStackRef_FromPyObjectSteal(PySequence_Tuple(arg_o)); + PyObject *tuple = PySequence_Tuple(arg_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(arg); - if (PyStackRef_IsNull(res)) goto pop_3_error; + if (tuple == NULL) goto pop_3_error; + res = PyStackRef_FromPyObjectSteal(tuple); } // _CHECK_PERIODIC { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index ac343a8048e008..224121dcaf1f27 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1965,6 +1965,9 @@ static Py_ssize_t sys_getrefcount_impl(PyObject *module, PyObject *object) /*[clinic end generated code: output=5fd477f2264b85b2 input=bf474efd50a21535]*/ { + if (_Py_IsImmortal(object)) { + return _Py_IMMORTAL_REFCNT; + } return Py_REFCNT(object); } From ee7f3a60cd071f2d003921b0ad8da2e291727bf8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 11 Oct 2024 09:47:05 +0100 Subject: [PATCH 02/20] Streamline PyStackRef_XCLOSE and PyStackRef_CLEAR. --- Include/internal/pycore_stackref.h | 58 +++++++++++++++++++----------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 1805eee642c8f6..f03ddb26177292 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -154,6 +154,24 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) return PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(stackref)); } +#define PyStackRef_XCLOSE(stackref) \ + do { \ + _PyStackRef _tmp = (stackref); \ + if (!PyStackRef_IsNull(_tmp)) { \ + PyStackRef_CLOSE(_tmp); \ + } \ + } while (0); + +#define PyStackRef_CLEAR(op) \ + do { \ + _PyStackRef *_tmp_op_ptr = &(op); \ + _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ + if (!PyStackRef_IsNull(_tmp_old_op)) { \ + *_tmp_op_ptr = PyStackRef_NULL; \ + PyStackRef_CLOSE(_tmp_old_op); \ + } \ + } while (0) + #else // Py_GIL_DISABLED // With GIL @@ -163,9 +181,10 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; +#define PyStackRef_NULL_BITS Py_TAG_REFCNT +static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; -#define PyStackRef_IsNull(ref) ((ref).bits == 0) +#define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) #define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) #define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) #define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) @@ -247,6 +266,23 @@ PyStackRef_CLOSE(_PyStackRef ref) } } +static inline void +PyStackRef_XCLOSE(_PyStackRef ref) +{ + if (!PyStackRef_HasCount(ref)) { + assert(!PyStackRef_IsNull(ref)); + Py_DECREF_MORTAL(BITS_TO_PTR(ref)); + } +} + +#define PyStackRef_CLEAR(REF) \ + do { \ + _PyStackRef *_tmp_op_ptr = &(REF); \ + _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ + *_tmp_op_ptr = PyStackRef_NULL; \ + PyStackRef_XCLOSE(_tmp_old_op); \ + } while (0) + #endif // Py_GIL_DISABLED // Note: this is a macro because MSVC (Windows) has trouble inlining it. @@ -259,24 +295,6 @@ PyStackRef_CLOSE(_PyStackRef ref) #define PyStackRef_TYPE(stackref) Py_TYPE(PyStackRef_AsPyObjectBorrow(stackref)) -#define PyStackRef_CLEAR(op) \ - do { \ - _PyStackRef *_tmp_op_ptr = &(op); \ - _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ - if (!PyStackRef_IsNull(_tmp_old_op)) { \ - *_tmp_op_ptr = PyStackRef_NULL; \ - PyStackRef_CLOSE(_tmp_old_op); \ - } \ - } while (0) - -#define PyStackRef_XCLOSE(stackref) \ - do { \ - _PyStackRef _tmp = (stackref); \ - if (!PyStackRef_IsNull(_tmp)) { \ - PyStackRef_CLOSE(_tmp); \ - } \ - } while (0); - // StackRef type checks From 21d7e87115930763f73fb4ca15867150e3f5d217 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 11 Oct 2024 10:56:18 +0100 Subject: [PATCH 03/20] Fix error handling with not stackref outputs in code generator --- Include/internal/pycore_opcode_metadata.h | 8 ++-- Include/internal/pycore_stackref.h | 1 + Include/internal/pycore_uop_metadata.h | 4 +- Lib/test/test_generated_cases.py | 3 +- Python/bytecodes.c | 13 ++---- Python/ceval.c | 4 ++ Python/executor_cases.c.h | 12 +++-- Python/generated_cases.c.h | 51 +++++++++++++--------- Tools/cases_generator/generators_common.py | 2 +- 9 files changed, 52 insertions(+), 46 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 8fec45b1e8d5c3..2dcc2825bd47ea 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1037,7 +1037,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, - [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1050,9 +1050,9 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ISINSTANCE] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1107,7 +1107,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_FROM] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, 0 }, [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index f03ddb26177292..05514ef6e2e6fc 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -269,6 +269,7 @@ PyStackRef_CLOSE(_PyStackRef ref) static inline void PyStackRef_XCLOSE(_PyStackRef ref) { + assert(ref.bits != 0); if (!PyStackRef_HasCount(ref)) { assert(!PyStackRef_IsNull(ref)); Py_DECREF_MORTAL(BITS_TO_PTR(ref)); diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fd41e9a5fe862b..fe89d8d860e5d3 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -227,7 +227,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_STR_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, - [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_CLASS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -241,7 +241,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_METHOD_DESCRIPTOR_NOARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAYBE_EXPAND_METHOD_KW] = HAS_ARG_FLAG, - [_PY_FRAME_KW] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_PY_FRAME_KW] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_FUNCTION_VERSION_KW] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_CHECK_METHOD_VERSION_KW] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_EXPAND_METHOD_KW] = HAS_ARG_FLAG, diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index cd3718b80612bd..819b77bf999038 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1255,8 +1255,7 @@ def test_push_then_error(self): b = 1; if (cond) { stack_pointer[0] = a; - stack_pointer[1] = b; - stack_pointer += 2; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); goto error; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6ffeaa4da5433d..b3450bdd919fba 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3325,13 +3325,11 @@ dummy_func( tstate, callable[0], locals, args, total_args, NULL, frame ); + ERROR_IF(new_frame == NULL, error); // Manipulate stack directly since we leave using DISPATCH_INLINED(). - SYNC_SP(); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - if (new_frame == NULL) { - ERROR_NO_POP(); - } + SYNC_SP(); frame->return_offset = (uint16_t)(next_instr - this_instr); DISPATCH_INLINED(new_frame); } @@ -3695,10 +3693,9 @@ dummy_func( DEAD(self); init_frame = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); - SYNC_SP(); if (init_frame == NULL) { _PyEval_FrameClearAndPop(tstate, shim); - ERROR_NO_POP(); + ERROR_IF(true, error); } frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. @@ -4258,12 +4255,10 @@ dummy_func( args, positional_args, kwnames_o, frame ); PyStackRef_CLOSE(kwnames); + ERROR_IF(new_frame == NULL, error); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. SYNC_SP(); - if (new_frame == NULL) { - ERROR_NO_POP(); - } } op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable[1], self_or_null[1], unused[oparg], kwnames -- callable[1], self_or_null[1], unused[oparg], kwnames)) { diff --git a/Python/ceval.c b/Python/ceval.c index 970f4fe627e3f4..75e7cfe7570cea 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -146,6 +146,10 @@ dump_stack(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer) if (ptr != stack_base) { printf(", "); } + if (PyStackRef_IsNull(*ptr)) { + printf(""); + continue; + } PyObject *obj = PyStackRef_AsPyObjectBorrow(*ptr); if (obj == NULL) { printf(""); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b4c497709c8941..4b9a8a62abddf6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4408,18 +4408,18 @@ init_frame = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; - assert(WITHIN_STACK_BOUNDS()); if (init_frame == NULL) { _PyEval_FrameClearAndPop(tstate, shim); - JUMP_TO_ERROR(); + if (true) JUMP_TO_ERROR(); } frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, * as it will be checked after start_frame */ tstate->py_recursion_remaining--; + stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; + stack_pointer += -1 - oparg; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -5096,14 +5096,12 @@ ); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(kwnames); + if (new_frame == NULL) JUMP_TO_ERROR(); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - JUMP_TO_ERROR(); - } break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 127586198ea31d..8ded9d63b153bd 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -925,14 +925,16 @@ args, total_args, NULL, frame ); stack_pointer = _PyFrame_GetStackPointer(frame); - // Manipulate stack directly since we leave using DISPATCH_INLINED(). - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); - // The frame has stolen all the arguments from the stack, - // so there is no need to clean them up. if (new_frame == NULL) { + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); goto error; } + // Manipulate stack directly since we leave using DISPATCH_INLINED(). + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); frame->return_offset = (uint16_t)(next_instr - this_instr); DISPATCH_INLINED(new_frame); } @@ -1072,12 +1074,13 @@ init_frame = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; - assert(WITHIN_STACK_BOUNDS()); if (init_frame == NULL) { _PyEval_FrameClearAndPop(tstate, shim); - goto error; + if (true) { + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } } frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. @@ -1092,7 +1095,7 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); @@ -2092,14 +2095,16 @@ ); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(kwnames); + if (new_frame == NULL) { + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - goto error; - } } // _SAVE_RETURN_OFFSET { @@ -2270,14 +2275,16 @@ ); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(kwnames); + if (new_frame == NULL) { + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - goto error; - } } // _SAVE_RETURN_OFFSET { @@ -4438,14 +4445,16 @@ args, total_args, NULL, frame ); stack_pointer = _PyFrame_GetStackPointer(frame); - // Manipulate stack directly since we leave using DISPATCH_INLINED(). - stack_pointer += -2 - oparg; - assert(WITHIN_STACK_BOUNDS()); - // The frame has stolen all the arguments from the stack, - // so there is no need to clean them up. if (new_frame == NULL) { + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); goto error; } + // Manipulate stack directly since we leave using DISPATCH_INLINED(). + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); frame->return_offset = (uint16_t)(next_instr - this_instr); DISPATCH_INLINED(new_frame); } diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 0bfa1a3b56fbc2..955c88fd052e6d 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -190,7 +190,7 @@ def error_if( self.out.emit(";\n") else: self.out.emit("{\n") - storage.copy().flush(self.out) + storage.copy().stack.flush(self.out) self.out.emit("goto ") self.out.emit(label) self.out.emit(";\n") From 2636c049d78030d07fe41fbe72b12dc58b230052 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 14 Oct 2024 14:20:13 +0100 Subject: [PATCH 04/20] Attempt to fix _BINARY_OP_INPLACE_ADD_UNICODE and use tag references in LOAD_CONST --- Include/internal/pycore_code.h | 2 +- Include/internal/pycore_opcode_metadata.h | 4 ++-- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 17 +++++++-------- Python/executor_cases.c.h | 17 +++++++-------- Python/generated_cases.c.h | 25 ++++++++++++++--------- 6 files changed, 35 insertions(+), 32 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 57e0a14bb9b5bd..01fb9ea67307c4 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -317,7 +317,7 @@ extern void _PyCode_Clear_Executors(PyCodeObject *code); // gh-115999 tracks progress on addressing this. #define ENABLE_SPECIALIZATION 0 #else -#define ENABLE_SPECIALIZATION 1 +#define ENABLE_SPECIALIZATION 0 #endif /* Specialization functions */ diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 2dcc2825bd47ea..de413be7b34d92 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1149,7 +1149,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_COMMON_CONSTANT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG }, + [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, @@ -1187,7 +1187,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [RESERVED] = { true, INSTR_FMT_IX, 0 }, [RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RESUME_CHECK] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, - [RETURN_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, + [RETURN_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG }, [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [RETURN_VALUE] = { true, INSTR_FMT_IX, 0 }, [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fe89d8d860e5d3..e74bb2f0bbffdb 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -35,7 +35,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, + [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_STORE_FAST_0] = HAS_LOCAL_FLAG, [_STORE_FAST_1] = HAS_LOCAL_FLAG, [_STORE_FAST_2] = HAS_LOCAL_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7a3cea112299cd..9ae637825c3834 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -256,7 +256,7 @@ dummy_func( } pure inst(LOAD_CONST, (-- value)) { - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); } replicate(8) inst(STORE_FAST, (value --)) { @@ -611,9 +611,6 @@ dummy_func( // specializations, but there is no output. // At the end we just skip over the STORE_FAST. op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); - int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -635,12 +632,12 @@ dummy_func( * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); - PyStackRef_CLOSE(left); - DEAD(left); - PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); - PyUnicode_Append(&temp, right_o); - *target_local = PyStackRef_FromPyObjectSteal(temp); + PyObject *left_o = PyStackRef_AsPyObjectSteal(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + PyStackRef_CLEAR(*target_local); + assert(Py_REFCNT(left_o) >= 1); + PyUnicode_Append(&left_o, right_o); + *target_local = PyStackRef_FromPyObjectSteal(left_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); DEAD(right); ERROR_IF(PyStackRef_IsNull(*target_local), error); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3d84a93e906730..02e7514045d182 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -203,7 +203,9 @@ case _LOAD_CONST: { _PyStackRef value; oparg = CURRENT_OPARG(); - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -737,8 +739,6 @@ _PyStackRef left; right = stack_pointer[-1]; left = stack_pointer[-2]; - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -763,11 +763,12 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); - PyStackRef_CLOSE(left); - PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); - PyUnicode_Append(&temp, right_o); - *target_local = PyStackRef_FromPyObjectSteal(temp); + PyObject *left_o = PyStackRef_AsPyObjectSteal(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + PyStackRef_CLEAR(*target_local); + assert(Py_REFCNT(left_o) >= 1); + PyUnicode_Append(&left_o, right_o); + *target_local = PyStackRef_FromPyObjectSteal(left_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) JUMP_TO_ERROR(); #if TIER_ONE diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ca1134e928f11c..f0b591c85ba3b5 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -183,8 +183,6 @@ /* Skip 1 cache entry */ // _BINARY_OP_INPLACE_ADD_UNICODE { - PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); - PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); int next_oparg; #if TIER_ONE assert(next_instr->op.code == STORE_FAST); @@ -206,11 +204,12 @@ * only the locals reference, so PyUnicode_Append knows * that the string is safe to mutate. */ - assert(Py_REFCNT(left_o) >= 2); - PyStackRef_CLOSE(left); - PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); - PyUnicode_Append(&temp, right_o); - *target_local = PyStackRef_FromPyObjectSteal(temp); + PyObject *left_o = PyStackRef_AsPyObjectSteal(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + PyStackRef_CLEAR(*target_local); + assert(Py_REFCNT(left_o) >= 1); + PyUnicode_Append(&left_o, right_o); + *target_local = PyStackRef_FromPyObjectSteal(left_o); PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) goto pop_2_error; #if TIER_ONE @@ -4899,7 +4898,9 @@ _PyStackRef res; // _LOAD_CONST { - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + stack_pointer = _PyFrame_GetStackPointer(frame); } // _RETURN_VALUE_EVENT { @@ -5909,7 +5910,9 @@ next_instr += 1; INSTRUCTION_STATS(LOAD_CONST); _PyStackRef value; - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -6967,7 +6970,9 @@ _PyStackRef res; // _LOAD_CONST { - value = PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + stack_pointer = _PyFrame_GetStackPointer(frame); } // _RETURN_VALUE { From a79aa478d91ba212e6f2c7cc141a5489a13b7f13 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 14 Oct 2024 16:25:05 +0100 Subject: [PATCH 05/20] Show frame locals in lltrace --- Include/internal/pycore_code.h | 2 +- Python/bytecodes.c | 2 +- Python/ceval.c | 61 +++++++++++++++++++++------------- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 6 ++-- 5 files changed, 44 insertions(+), 29 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 01fb9ea67307c4..57e0a14bb9b5bd 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -317,7 +317,7 @@ extern void _PyCode_Clear_Executors(PyCodeObject *code); // gh-115999 tracks progress on addressing this. #define ENABLE_SPECIALIZATION 0 #else -#define ENABLE_SPECIALIZATION 0 +#define ENABLE_SPECIALIZATION 1 #endif /* Specialization functions */ diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 444aab823a5a55..8952551927f7be 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -256,7 +256,7 @@ dummy_func( } pure inst(LOAD_CONST, (-- value)) { - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); } replicate(8) inst(STORE_FAST, (value --)) { diff --git a/Python/ceval.c b/Python/ceval.c index 75e7cfe7570cea..ac70379d822d54 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -136,39 +136,54 @@ #ifdef LLTRACE +static void +dump_item(_PyStackRef item) +{ + if (PyStackRef_IsNull(item)) { + printf(""); + return; + } + PyObject *obj = PyStackRef_AsPyObjectBorrow(item); + if (obj == NULL) { + printf(""); + return; + } + if ( + obj == Py_None + || PyBool_Check(obj) + || PyLong_CheckExact(obj) + || PyFloat_CheckExact(obj) + || PyUnicode_CheckExact(obj) + ) { + if (PyObject_Print(obj, stdout, 0) == 0) { + return; + } + PyErr_Clear(); + } + // Don't call __repr__(), it might recurse into the interpreter. + printf("<%s at %p>", Py_TYPE(obj)->tp_name, (void *)(item.bits)); +} + static void dump_stack(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer) { + _PyStackRef *locals_base = _PyFrame_GetLocalsArray(frame); _PyStackRef *stack_base = _PyFrame_Stackbase(frame); PyObject *exc = PyErr_GetRaisedException(); + printf(" locals=["); + for (_PyStackRef *ptr = locals_base; ptr < stack_base; ptr++) { + if (ptr != locals_base) { + printf(", "); + } + dump_item(*ptr); + } + printf("]\n"); printf(" stack=["); for (_PyStackRef *ptr = stack_base; ptr < stack_pointer; ptr++) { if (ptr != stack_base) { printf(", "); } - if (PyStackRef_IsNull(*ptr)) { - printf(""); - continue; - } - PyObject *obj = PyStackRef_AsPyObjectBorrow(*ptr); - if (obj == NULL) { - printf(""); - continue; - } - if ( - obj == Py_None - || PyBool_Check(obj) - || PyLong_CheckExact(obj) - || PyFloat_CheckExact(obj) - || PyUnicode_CheckExact(obj) - ) { - if (PyObject_Print(obj, stdout, 0) == 0) { - continue; - } - PyErr_Clear(); - } - // Don't call __repr__(), it might recurse into the interpreter. - printf("<%s at %p>", Py_TYPE(obj)->tp_name, (void *)(ptr->bits)); + dump_item(*ptr); } printf("]\n"); fflush(stdout); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9d0b34c1f12c14..48386d2788150e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -204,7 +204,7 @@ _PyStackRef value; oparg = CURRENT_OPARG(); _PyFrame_SetStackPointer(frame, stack_pointer); - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[0] = value; stack_pointer += 1; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6efc287e1b22b8..1e82918bc9847f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4899,7 +4899,7 @@ // _LOAD_CONST { _PyFrame_SetStackPointer(frame, stack_pointer); - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); stack_pointer = _PyFrame_GetStackPointer(frame); } // _RETURN_VALUE_EVENT @@ -5911,7 +5911,7 @@ INSTRUCTION_STATS(LOAD_CONST); _PyStackRef value; _PyFrame_SetStackPointer(frame, stack_pointer); - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[0] = value; stack_pointer += 1; @@ -6971,7 +6971,7 @@ // _LOAD_CONST { _PyFrame_SetStackPointer(frame, stack_pointer); - value = _PyStackRef_FromPyObjectWithCount(GETITEM(FRAME_CO_CONSTS, oparg)); + value = _PyStackRef_FromPyObjectNew(GETITEM(FRAME_CO_CONSTS, oparg)); stack_pointer = _PyFrame_GetStackPointer(frame); } // _RETURN_VALUE From a90e6440ba63697db03e9cb241ccf6bded5fc441 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 15 Oct 2024 10:41:58 +0100 Subject: [PATCH 06/20] Fix _PyFrame_Copy and add some asserts to make reference heap safe --- Include/internal/pycore_frame.h | 20 ++++++++++------ Include/internal/pycore_stackref.h | 35 ++++++++++++++++++++++++++++ Python/bytecodes.c | 17 ++++++++++++++ Python/executor_cases.c.h | 37 ++++++++++++++++++++++++++++++ Python/frame.c | 23 +++++++++---------- Python/generated_cases.c.h | 20 ++++++++++++++++ Tools/cases_generator/analyzer.py | 2 ++ 7 files changed, 135 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index c9ac3819d0390b..fda6c7d79aff47 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -124,17 +124,23 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code) static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) { - *dest = *src; + dest->f_executable = PyStackRef_HeapSafe(src->f_executable); + // Don't leave a dangling pointer to the old frame when creating generators + // and coroutines: + dest->previous = NULL; + dest->f_funcobj = PyStackRef_HeapSafe(src->f_funcobj); + dest->f_globals = src->f_globals; + dest->f_builtins = src->f_builtins; + dest->f_locals = src->f_locals; + dest->frame_obj = src->frame_obj; + dest->instr_ptr = src->instr_ptr; assert(src->stackpointer != NULL); int stacktop = (int)(src->stackpointer - src->localsplus); - assert(stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus); + assert(stacktop >= 0); dest->stackpointer = dest->localsplus + stacktop; - for (int i = 1; i < stacktop; i++) { - dest->localsplus[i] = src->localsplus[i]; + for (int i = 0; i < stacktop; i++) { + dest->localsplus[i] = PyStackRef_HeapSafe(src->localsplus[i]); } - // Don't leave a dangling pointer to the old frame when creating generators - // and coroutines: - dest->previous = NULL; #ifdef Py_GIL_DISABLED PyCodeObject *co = _PyFrame_GetCode(dest); diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 335b2935fe9fdf..7d4811736bbfab 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -154,6 +154,18 @@ PyStackRef_DUP(_PyStackRef stackref) return stackref; } +static inline int +PyStackRef_IsHeapSafe(_PyStackRef ref) +{ + return 1; +} + +static inline _PyStackRef +PyStackRef_HeapSafe(_PyStackRef ref) +{ + return ref; +} + // Convert a possibly deferred reference to a strong reference. static inline _PyStackRef PyStackRef_AsStrongReference(_PyStackRef stackref) @@ -264,6 +276,29 @@ PyStackRef_DUP(_PyStackRef ref) return ref; } +static inline int +PyStackRef_IsHeapSafe(_PyStackRef ref) +{ + return ( + PyStackRef_IsNull(ref) || + !PyStackRef_HasCount(ref) || + _Py_IsImmortal(PyStackRef_AsPyObjectBorrow(ref)) + ); +} + +static inline _PyStackRef +PyStackRef_HeapSafe(_PyStackRef ref) +{ + if (PyStackRef_HasCount(ref)) { + PyObject *obj = BITS_TO_PTR_MASKED(ref); + if (obj != NULL && !_Py_IsImmortal(obj)) { + Py_INCREF_MORTAL(obj); + ref.bits = (uintptr_t)obj; + } + } + return ref; +} + static inline void PyStackRef_CLOSE(_PyStackRef ref) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8952551927f7be..a4f142c119bd4c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -260,6 +260,10 @@ dummy_func( } replicate(8) inst(STORE_FAST, (value --)) { + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); DEAD(value); } @@ -269,6 +273,10 @@ dummy_func( }; inst(STORE_FAST_LOAD_FAST, (value1 -- value2)) { + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value1) + ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; SETLOCAL(oparg1, value1); @@ -277,6 +285,14 @@ dummy_func( } inst(STORE_FAST_STORE_FAST, (value2, value1 --)) { + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value1) + ); + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value2) + ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; SETLOCAL(oparg1, value1); @@ -981,6 +997,7 @@ dummy_func( assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); DEAD(retval); SAVE_STACK(); assert(EMPTY()); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 48386d2788150e..4565d8d4307acc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -217,6 +217,10 @@ oparg = 0; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -228,6 +232,10 @@ oparg = 1; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -239,6 +247,10 @@ oparg = 2; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -250,6 +262,10 @@ oparg = 3; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -261,6 +277,10 @@ oparg = 4; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -272,6 +292,10 @@ oparg = 5; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -283,6 +307,10 @@ oparg = 6; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -294,6 +322,10 @@ oparg = 7; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -304,6 +336,10 @@ _PyStackRef value; oparg = CURRENT_OPARG(); value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1258,6 +1294,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/frame.c b/Python/frame.c index 35e6c2d0a93333..f0fb0f6f2aee77 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -52,23 +52,19 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) assert(frame->owner != FRAME_OWNED_BY_CSTACK); assert(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT); assert(frame->owner != FRAME_CLEARED); - Py_ssize_t size = ((char*)frame->stackpointer) - (char *)frame; - memcpy((_PyInterpreterFrame *)f->_f_frame_data, frame, size); - frame = (_PyInterpreterFrame *)f->_f_frame_data; - frame->stackpointer = (_PyStackRef *)(((char *)frame) + size); - frame->f_executable = PyStackRef_DUP(frame->f_executable); - f->f_frame = frame; - frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; - if (_PyFrame_IsIncomplete(frame)) { + _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)f->_f_frame_data; + _PyFrame_Copy(frame, new_frame); + f->f_frame = new_frame; + new_frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; + if (_PyFrame_IsIncomplete(new_frame)) { // This may be a newly-created generator or coroutine frame. Since it's // dead anyways, just pretend that the first RESUME ran: - PyCodeObject *code = _PyFrame_GetCode(frame); - frame->instr_ptr = _PyCode_CODE(code) + code->_co_firsttraceable + 1; + PyCodeObject *code = _PyFrame_GetCode(new_frame); + new_frame->instr_ptr = _PyCode_CODE(code) + code->_co_firsttraceable + 1; } - assert(!_PyFrame_IsIncomplete(frame)); + assert(!_PyFrame_IsIncomplete(new_frame)); assert(f->f_back == NULL); _PyInterpreterFrame *prev = _PyFrame_GetFirstComplete(frame->previous); - frame->previous = NULL; if (prev) { assert(prev->owner != FRAME_OWNED_BY_CSTACK); /* Link PyFrameObjects.f_back and remove link through _PyInterpreterFrame.previous */ @@ -116,6 +112,9 @@ _PyFrame_ClearExceptCode(_PyInterpreterFrame *frame) PyFrameObject *f = frame->frame_obj; frame->frame_obj = NULL; if (Py_REFCNT(f) > 1) { + // Take ownership takes the reference to the executable, + // so we need to incref it. + PyStackRef_AsPyObjectNew(frame->f_executable); take_ownership(f, frame); Py_DECREF(f); return; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1e82918bc9847f..d68e607a99d652 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4922,6 +4922,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -4967,6 +4968,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -6981,6 +6983,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); _PyFrame_SetStackPointer(frame, stack_pointer); assert(EMPTY()); _Py_LeaveRecursiveCallPy(tstate); @@ -7043,6 +7046,7 @@ assert(frame != &entry_frame); #endif _PyStackRef temp = retval; + assert(PyStackRef_IsHeapSafe(temp)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7501,6 +7505,10 @@ INSTRUCTION_STATS(STORE_FAST); _PyStackRef value; value = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value) + ); SETLOCAL(oparg, value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -7514,6 +7522,10 @@ _PyStackRef value1; _PyStackRef value2; value1 = stack_pointer[-1]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value1) + ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; SETLOCAL(oparg1, value1); @@ -7530,6 +7542,14 @@ _PyStackRef value1; value1 = stack_pointer[-1]; value2 = stack_pointer[-2]; + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value1) + ); + assert( + ((_PyFrame_GetCode(frame)->co_flags & (CO_COROUTINE | CO_GENERATOR)) == 0) || + PyStackRef_IsHeapSafe(value2) + ); uint32_t oparg1 = oparg >> 4; uint32_t oparg2 = oparg & 15; SETLOCAL(oparg1, value1); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 381ad3a4e2082c..9d7aaa988bf467 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -546,7 +546,9 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", + "PyStackRef_HasCount", "PyStackRef_Is", + "PyStackRef_IsHeapSafe", "PyStackRef_IsNull", "PyStackRef_None", "PyStackRef_TYPE", From 83e0323be5ea7a757400cda9e9e289c6ba124cbc Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 16 Oct 2024 18:19:47 +0100 Subject: [PATCH 07/20] Improve efficiency of _PyStackRef_FromPyObjectNew a bit --- Include/internal/pycore_stackref.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 7d4811736bbfab..a1c5fba0db8cb7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -249,7 +249,7 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) if (_Py_IsDeferrable(obj)) { return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; } - Py_INCREF(obj); + Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; return ref; } From f03f745930554105f98a248723d1350b84462e26 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 29 Oct 2024 18:28:26 +0000 Subject: [PATCH 08/20] Make making heap safe more efficient --- Include/internal/pycore_stackref.h | 27 ++++++++++++++++----------- Include/refcount.h | 4 +++- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index a1c5fba0db8cb7..007443762788af 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -195,18 +195,19 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL -#define Py_TAG_BITS 1 +#define Py_TAG_BITS 3 #define Py_TAG_REFCNT 1 +#define Py_TAG_IMMORTAL 3 #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -#define PyStackRef_NULL_BITS Py_TAG_REFCNT +#define PyStackRef_NULL_BITS Py_TAG_IMMORTAL static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) -#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL }) static inline int PyStackRef_HasCount(_PyStackRef ref) @@ -214,6 +215,12 @@ PyStackRef_HasCount(_PyStackRef ref) return ref.bits & Py_TAG_REFCNT; } +static inline int +PyStackRef_HasCountAndMortal(_PyStackRef ref) +{ + return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; +} + static inline PyObject * PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { @@ -238,7 +245,7 @@ static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); - unsigned int tag = _Py_IsDeferrable(obj) ? Py_TAG_REFCNT : 0; + unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); return ref; } @@ -289,12 +296,10 @@ PyStackRef_IsHeapSafe(_PyStackRef ref) static inline _PyStackRef PyStackRef_HeapSafe(_PyStackRef ref) { - if (PyStackRef_HasCount(ref)) { + if (PyStackRef_HasCountAndMortal(ref)) { PyObject *obj = BITS_TO_PTR_MASKED(ref); - if (obj != NULL && !_Py_IsImmortal(obj)) { - Py_INCREF_MORTAL(obj); - ref.bits = (uintptr_t)obj; - } + Py_INCREF_MORTAL(obj); + ref.bits = (uintptr_t)obj; } return ref; } diff --git a/Include/refcount.h b/Include/refcount.h index 1a5265e27b0c01..39d606ffbc6b84 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -377,7 +377,9 @@ static inline void Py_DECREF_MORTAL(const char *filename, int lineno, PyObject * _Py_NegativeRefcount(filename, lineno, op); } _Py_DECREF_STAT_INC(); - _Py_DECREF_DecRefTotal(); + if (!_Py_IsImmortal(op)) { + _Py_DECREF_DecRefTotal(); + } if (--op->ob_refcnt == 0) { _Py_Dealloc(op); } From ff9d044ca863c1b982bb12eb6834c1d1dffcc2c4 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 15:18:45 +0000 Subject: [PATCH 09/20] Fix up after merge --- Include/internal/pycore_frame.h | 3 +- Include/internal/pycore_stackref.h | 62 +++++++++++++++++++++++------- Python/ceval.c | 55 ++++++++++++++++++++++---- Python/ceval_macros.h | 1 + Python/frame.c | 4 +- 5 files changed, 101 insertions(+), 24 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 1d6c51dc24dbac..1e434e9ed43d52 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -157,6 +157,7 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * dest->stackpointer = dest->localsplus + stacktop; for (int i = 0; i < stacktop; i++) { dest->localsplus[i] = PyStackRef_HeapSafe(src->localsplus[i]); + PyStackRef_CheckValid(dest->localsplus[i]); } #ifdef Py_GIL_DISABLED @@ -408,7 +409,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int PyAPI_FUNC(_PyInterpreterFrame *) _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, - PyObject *locals, _PyStackRef const* args, + PyObject *locals, _PyStackRef const *args, size_t argcount, PyObject *kwnames, _PyInterpreterFrame *previous); diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 0f83d2e7ad96f0..9d60ac698d3bb2 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -200,22 +200,49 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL #define Py_TAG_BITS 3 +#define Py_NULL_BIT 2 #define Py_TAG_REFCNT 1 -#define Py_TAG_IMMORTAL 3 #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -#define PyStackRef_NULL_BITS Py_TAG_IMMORTAL +#define PyStackRef_NULL_BITS (Py_TAG_REFCNT | Py_NULL_BIT) static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) -#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) -#define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) -#define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) -#define PyStackRef_IsNone(ref) ((ref).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) +// #define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT)) +// #define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT)) +// #define PyStackRef_IsNone(ref) ((ref).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT)) + +/* We should be able to guarantee that the tag bits are set for immortal objects */ + +#define PyStackRef_IsTrue(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_TrueStruct)) +#define PyStackRef_IsFalse(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_FalseStruct)) +// #define PyStackRef_IsNone(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) + + +static inline void PyStackRef_CheckValid(_PyStackRef ref) { + int tag = ref.bits & Py_TAG_BITS; + if (tag == PyStackRef_NULL_BITS) { + assert(ref.bits == PyStackRef_NULL_BITS); + } + else if (tag == 0) { + assert(!_Py_IsImmortal(BITS_TO_PTR_MASKED(ref))); + } +} + +static inline int +PyStackRef_IsNone(_PyStackRef ref) +{ + if ((ref.bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) { + assert ((ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT); + return 1; + } + return 0; +} static inline int PyStackRef_HasCount(_PyStackRef ref) @@ -224,7 +251,7 @@ PyStackRef_HasCount(_PyStackRef ref) } static inline int -PyStackRef_HasCountAndMortal(_PyStackRef ref) +PyStackRef_HasCountAndNotNull(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; } @@ -253,8 +280,9 @@ static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); - unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; + unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); + PyStackRef_CheckValid(ref); return ref; } @@ -272,6 +300,7 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) } Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; + PyStackRef_CheckValid(ref); return ref; } #define PyStackRef_FromPyObjectNew(obj) _PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) @@ -310,11 +339,16 @@ PyStackRef_IsHeapSafe(_PyStackRef ref) static inline _PyStackRef PyStackRef_HeapSafe(_PyStackRef ref) { - if (PyStackRef_HasCountAndMortal(ref)) { - PyObject *obj = BITS_TO_PTR_MASKED(ref); - Py_INCREF_MORTAL(obj); - ref.bits = (uintptr_t)obj; + if (!PyStackRef_HasCountAndNotNull(ref)) { + return ref; + } + PyObject *obj = BITS_TO_PTR_MASKED(ref); + if (_Py_IsImmortal(obj)) { + return ref; } + Py_INCREF_MORTAL(obj); + ref.bits = (uintptr_t)obj; + PyStackRef_CheckValid(ref); return ref; } diff --git a/Python/ceval.c b/Python/ceval.c index 64652816f4c237..c181f762fba2ef 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1515,6 +1515,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, } assert(PyStackRef_IsNull(localsplus[i])); localsplus[i] = PyStackRef_FromPyObjectSteal(kwdict); + PyStackRef_CheckValid(localsplus[i]); } else { kwdict = NULL; @@ -1531,6 +1532,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, for (j = 0; j < n; j++) { assert(PyStackRef_IsNull(localsplus[j])); localsplus[j] = args[j]; + PyStackRef_CheckValid(localsplus[j]); } /* Pack other positional arguments into the *args argument */ @@ -1654,6 +1656,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, goto kw_fail; } localsplus[j] = value_stackref; + PyStackRef_CheckValid(localsplus[j]); } } @@ -1689,6 +1692,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, if (PyStackRef_AsPyObjectBorrow(localsplus[m+i]) == NULL) { PyObject *def = defs[i]; localsplus[m+i] = PyStackRef_FromPyObjectNew(def); + PyStackRef_CheckValid(localsplus[m+i]); } } } @@ -1708,6 +1712,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, } if (def) { localsplus[i] = PyStackRef_FromPyObjectSteal(def); + PyStackRef_CheckValid(localsplus[i]); continue; } } @@ -1835,12 +1840,27 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, _PyStackRef func, PyStackRef_CLOSE(func); goto error; } + size_t total_args = nargs + PyDict_GET_SIZE(kwargs); + for (size_t i = 0; i < total_args; i++) { + ((_PyStackRef *)newargs)[i] = PyStackRef_FromPyObjectSteal(newargs[i]); + } } else { - newargs = &PyTuple_GET_ITEM(callargs, 0); - /* We need to incref all our args since the new frame steals the references. */ - for (Py_ssize_t i = 0; i < nargs; ++i) { - Py_INCREF(PyTuple_GET_ITEM(callargs, i)); + if (nargs <= 8) { + PyObject *stack_array[8]; + newargs = stack_array; + } + else { + newargs = PyMem_Malloc(sizeof(PyObject *) *nargs); + if (newargs == NULL) { + PyErr_NoMemory(); + PyStackRef_CLOSE(func); + goto error; + } + } + /* We need to tag all our args since the new frame steals the references. */ + for (Py_ssize_t i = 0; i < nargs; i++) { + ((_PyStackRef *)newargs)[i] = PyStackRef_FromPyObjectNew(PyTuple_GET_ITEM(callargs, i)); } } _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit( @@ -1850,6 +1870,9 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, _PyStackRef func, if (has_dict) { _PyStack_UnpackDict_FreeNoDecRef(newargs, kwnames); } + else if (nargs > 8) { + PyMem_Free((void *)newargs); + } /* No need to decref func here because the reference has been stolen by _PyEvalFramePushAndInit. */ @@ -1868,21 +1891,39 @@ _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func, PyObject* const* args, size_t argcount, PyObject *kwnames) { + size_t total_args = argcount; + if (kwnames) { + total_args += PyTuple_GET_SIZE(kwnames); + } + _PyStackRef *arguments; + if (total_args <= 8) { + _PyStackRef stack_array[8]; + arguments = stack_array; + } + else { + arguments = PyMem_Malloc(sizeof(_PyStackRef) * total_args); + if (arguments == NULL) { + return PyErr_NoMemory(); + } + } /* _PyEvalFramePushAndInit consumes the references * to func, locals and all its arguments */ Py_XINCREF(locals); for (size_t i = 0; i < argcount; i++) { - Py_INCREF(args[i]); + arguments[i] = _PyStackRef_FromPyObjectNew(args[i]); } if (kwnames) { Py_ssize_t kwcount = PyTuple_GET_SIZE(kwnames); for (Py_ssize_t i = 0; i < kwcount; i++) { - Py_INCREF(args[i+argcount]); + arguments[i+argcount] = _PyStackRef_FromPyObjectNew(args[i+argcount]); } } _PyInterpreterFrame *frame = _PyEvalFramePushAndInit( tstate, PyStackRef_FromPyObjectNew(func), locals, - (_PyStackRef const *)args, argcount, kwnames, NULL); + arguments, argcount, kwnames, NULL); + if (total_args > 8) { + PyMem_Free(arguments); + } if (frame == NULL) { return NULL; } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 61534492fa92b9..75e20e374f4fbc 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -257,6 +257,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { variable would be pointing to already-freed memory. */ #define SETLOCAL(i, value) do { _PyStackRef tmp = GETLOCAL(i); \ GETLOCAL(i) = value; \ + PyStackRef_CheckValid(value); \ PyStackRef_XCLOSE(tmp); } while (0) #define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op) diff --git a/Python/frame.c b/Python/frame.c index 79d3ccb7966a46..592853970bfb2b 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -60,8 +60,8 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) // This may be a newly-created generator or coroutine frame. Since it's // dead anyways, just pretend that the first RESUME ran: PyCodeObject *code = _PyFrame_GetCode(new_frame); - frame->instr_ptr = - _PyFrame_GetBytecode(frame) + code->_co_firsttraceable + 1; + new_frame->instr_ptr = + _PyFrame_GetBytecode(new_frame) + code->_co_firsttraceable + 1; } assert(!_PyFrame_IsIncomplete(new_frame)); assert(f->f_back == NULL); From b7a8b5d18094287018e6c01880c410597ec6c422 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Dec 2024 10:56:24 +0000 Subject: [PATCH 10/20] Restore immortality bit --- Include/internal/pycore_frame.h | 6 +-- Include/internal/pycore_stackref.h | 81 ++++++++++++++++-------------- 2 files changed, 46 insertions(+), 41 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 89907bcaea1e0c..1d35075b7486e8 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -142,11 +142,11 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code) static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) { - dest->f_executable = PyStackRef_HeapSafe(src->f_executable); + dest->f_executable = PyStackRef_MakeHeapSafe(src->f_executable); // Don't leave a dangling pointer to the old frame when creating generators // and coroutines: dest->previous = NULL; - dest->f_funcobj = PyStackRef_HeapSafe(src->f_funcobj); + dest->f_funcobj = PyStackRef_MakeHeapSafe(src->f_funcobj); dest->f_globals = src->f_globals; dest->f_builtins = src->f_builtins; dest->f_locals = src->f_locals; @@ -157,7 +157,7 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * assert(stacktop >= 0); dest->stackpointer = dest->localsplus + stacktop; for (int i = 0; i < stacktop; i++) { - dest->localsplus[i] = PyStackRef_HeapSafe(src->localsplus[i]); + dest->localsplus[i] = PyStackRef_MakeHeapSafe(src->localsplus[i]); PyStackRef_CheckValid(dest->localsplus[i]); } diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 9d60ac698d3bb2..3eb67674bc7bb7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -165,7 +165,7 @@ PyStackRef_IsHeapSafe(_PyStackRef ref) } static inline _PyStackRef -PyStackRef_HeapSafe(_PyStackRef ref) +PyStackRef_MakeHeapSafe(_PyStackRef ref) { return ref; } @@ -200,58 +200,73 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) // With GIL #define Py_TAG_BITS 3 -#define Py_NULL_BIT 2 +#define Py_TAG_IMMORTAL 3 #define Py_TAG_REFCNT 1 #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) #define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) -#define PyStackRef_NULL_BITS (Py_TAG_REFCNT | Py_NULL_BIT) +#define PyStackRef_NULL_BITS Py_TAG_IMMORTAL static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsNull(ref) ((ref).bits == PyStackRef_NULL_BITS) -#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT }) -#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT }) -#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT }) +#define PyStackRef_True ((_PyStackRef){.bits = ((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_False ((_PyStackRef){.bits = ((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL }) +#define PyStackRef_None ((_PyStackRef){.bits = ((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL }) -// #define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_REFCNT)) -// #define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_REFCNT)) -// #define PyStackRef_IsNone(ref) ((ref).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_REFCNT)) +// #define PyStackRef_IsTrue(ref) ((ref).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) +// #define PyStackRef_IsFalse(ref) ((ref).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL)) +// #define /* We should be able to guarantee that the tag bits are set for immortal objects */ #define PyStackRef_IsTrue(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_TrueStruct)) #define PyStackRef_IsFalse(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_FalseStruct)) -// #define PyStackRef_IsNone(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) static inline void PyStackRef_CheckValid(_PyStackRef ref) { int tag = ref.bits & Py_TAG_BITS; - if (tag == PyStackRef_NULL_BITS) { - assert(ref.bits == PyStackRef_NULL_BITS); - } - else if (tag == 0) { - assert(!_Py_IsImmortal(BITS_TO_PTR_MASKED(ref))); + PyObject *obj = BITS_TO_PTR_MASKED(ref); + switch (tag) { + case 0: + assert(!_Py_IsImmortal(obj)); + break; + case Py_TAG_REFCNT: + /* Can be immortal if object was made immortal after reference came into existence */ + assert(obj != NULL && obj != Py_True && obj != Py_False && obj != Py_None); + break; + case Py_TAG_IMMORTAL: + assert(obj == NULL || _Py_IsImmortal(obj)); + break; + default: + assert(0); } } +#ifdef Py_DEBUG static inline int PyStackRef_IsNone(_PyStackRef ref) { if ((ref.bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) { - assert ((ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT); + assert ((ref.bits & Py_TAG_BITS) == Py_TAG_IMMORTAL); return 1; } return 0; } +#else + +#define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) + +#endif + static inline int PyStackRef_HasCount(_PyStackRef ref) { return ref.bits & Py_TAG_REFCNT; } -static inline int -PyStackRef_HasCountAndNotNull(_PyStackRef ref) +static inline bool +PyStackRef_HasCountAndMortal(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; } @@ -280,7 +295,7 @@ static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); - unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; + unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_IMMORTAL : 0; _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) | tag}); PyStackRef_CheckValid(ref); return ref; @@ -295,8 +310,8 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj) { - if (_Py_IsDeferrable(obj)) { - return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; + if (_Py_IsImmortal(obj)) { + return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_IMMORTAL}; } Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; @@ -307,14 +322,11 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) /* Create a new reference from an object with an embedded reference count */ static inline _PyStackRef -_PyStackRef_FromPyObjectWithCount(PyObject *obj) +PyStackRef_FromPyObjectImmortal(PyObject *obj) { - return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; + assert(_Py_IsImmortal(obj)); + return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_IMMORTAL}; } -#define PyStackRef_FromPyObjectWithCount(obj) _PyStackRef_FromPyObjectWithCount(_PyObject_CAST(obj)) - -#define PyStackRef_FromPyObjectImmortal PyStackRef_FromPyObjectWithCount - static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) @@ -326,26 +338,19 @@ PyStackRef_DUP(_PyStackRef ref) return ref; } -static inline int +static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return ( - PyStackRef_IsNull(ref) || - !PyStackRef_HasCount(ref) || - _Py_IsImmortal(PyStackRef_AsPyObjectBorrow(ref)) - ); + return !PyStackRef_HasCountAndMortal(ref); } static inline _PyStackRef -PyStackRef_HeapSafe(_PyStackRef ref) +PyStackRef_MakeHeapSafe(_PyStackRef ref) { - if (!PyStackRef_HasCountAndNotNull(ref)) { + if (!PyStackRef_HasCountAndMortal(ref)) { return ref; } PyObject *obj = BITS_TO_PTR_MASKED(ref); - if (_Py_IsImmortal(obj)) { - return ref; - } Py_INCREF_MORTAL(obj); ref.bits = (uintptr_t)obj; PyStackRef_CheckValid(ref); From 49d0d4273850c30f6c97c1b9b23ae68545eca38c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 4 Dec 2024 11:13:08 +0000 Subject: [PATCH 11/20] Fix validity check --- Include/internal/pycore_stackref.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 3eb67674bc7bb7..d656b1f0a02143 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -223,6 +223,8 @@ static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #define PyStackRef_IsFalse(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_FalseStruct)) +#ifdef Py_DEBUG + static inline void PyStackRef_CheckValid(_PyStackRef ref) { int tag = ref.bits & Py_TAG_BITS; PyObject *obj = BITS_TO_PTR_MASKED(ref); @@ -242,7 +244,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { } } -#ifdef Py_DEBUG static inline int PyStackRef_IsNone(_PyStackRef ref) { @@ -255,6 +256,7 @@ PyStackRef_IsNone(_PyStackRef ref) #else +#define PyStackRef_CheckValid(REF) ((void)0) #define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) #endif From 3a12d188a43862a188d27c0eccbaf8b8b1a0b89d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Dec 2024 14:25:39 +0000 Subject: [PATCH 12/20] Add PyStackRef_IsMortal --- Include/internal/pycore_stackref.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index d656b1f0a02143..1aff47a7c1ef65 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -267,12 +267,20 @@ PyStackRef_HasCount(_PyStackRef ref) return ref.bits & Py_TAG_REFCNT; } +/* Does this ref have an embedded refcount and refer to a mortal object (NULL is not mortal) */ static inline bool PyStackRef_HasCountAndMortal(_PyStackRef ref) { return (ref.bits & Py_TAG_BITS) == Py_TAG_REFCNT; } +/* Does this ref refer to a mortal object (NULL is not mortal) */ +static inline bool +PyStackRef_IsMortal(_PyStackRef ref) +{ + return (ref.bits & Py_TAG_BITS) != Py_TAG_IMMORTAL; +} + static inline PyObject * PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { From 497fb9fc13c16c35a21564f48c0ad2796609af69 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Dec 2024 15:28:35 +0000 Subject: [PATCH 13/20] Reduce overhead of stackrefs a tiny bit for mortal objects --- Include/internal/pycore_stackref.h | 15 ++++++++--- Python/bytecodes.c | 40 ++++++++++++--------------- Python/executor_cases.c.h | 43 +++++++++++++----------------- Python/generated_cases.c.h | 34 +++++++++++------------ Python/optimizer_bytecodes.c | 2 +- Tools/cases_generator/analyzer.py | 3 ++- 6 files changed, 65 insertions(+), 72 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 1aff47a7c1ef65..8424ac8183fb02 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -261,6 +261,7 @@ PyStackRef_IsNone(_PyStackRef ref) #endif +/* Does this ref have an embedded refcount */ static inline int PyStackRef_HasCount(_PyStackRef ref) { @@ -298,9 +299,6 @@ PyStackRef_AsPyObjectSteal(_PyStackRef ref) } } -/* We will want to extend this to a larger set of objects in the future */ -#define _Py_IsDeferrable _Py_IsImmortal - static inline _PyStackRef PyStackRef_FromPyObjectSteal(PyObject *obj) { @@ -311,6 +309,16 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) return ref; } +static inline _PyStackRef +PyStackRef_FromPyObjectStealMortal(PyObject *obj) +{ + assert(obj != NULL); + assert(!_Py_IsImmortal(obj)); + _PyStackRef ref = ((_PyStackRef){.bits = ((uintptr_t)(obj)) }); + PyStackRef_CheckValid(ref); + return ref; +} + // Check if a stackref is exactly the same as another stackref, including the // the deferred bit. This can only be used safely if you know that the deferred // bits of `a` and `b` match. @@ -320,6 +328,7 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj) { + assert(obj != NULL); if (_Py_IsImmortal(obj)) { return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_IMMORTAL}; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7c7053257de7ab..0b67e7b0390787 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -837,7 +837,7 @@ dummy_func( PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(str_st); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectImmortal(res_o); } inst(BINARY_SUBSCR_TUPLE_INT, (unused/1, tuple_st, sub_st -- res)) { @@ -1662,8 +1662,7 @@ dummy_func( int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed); #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -1679,8 +1678,7 @@ dummy_func( int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed); #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -1718,7 +1716,7 @@ dummy_func( if (cell == NULL) { ERROR_NO_POP(); } - SETLOCAL(oparg, PyStackRef_FromPyObjectSteal(cell)); + SETLOCAL(oparg, PyStackRef_FromPyObjectStealMortal(cell)); } inst(DELETE_DEREF, (--)) { @@ -1800,17 +1798,18 @@ dummy_func( } inst(BUILD_TUPLE, (values[oparg] -- tup)) { + assert(oparg != 0); PyObject *tup_o = _PyTuple_FromStackRefSteal(values, oparg); INPUTS_DEAD(); ERROR_IF(tup_o == NULL, error); - tup = PyStackRef_FromPyObjectSteal(tup_o); + tup = PyStackRef_FromPyObjectStealMortal(tup_o); } inst(BUILD_LIST, (values[oparg] -- list)) { PyObject *list_o = _PyList_FromStackRefSteal(values, oparg); INPUTS_DEAD(); ERROR_IF(list_o == NULL, error); - list = PyStackRef_FromPyObjectSteal(list_o); + list = PyStackRef_FromPyObjectStealMortal(list_o); } inst(LIST_EXTEND, (list_st, unused[oparg-1], iterable_st -- list_st, unused[oparg-1])) { @@ -1860,7 +1859,7 @@ dummy_func( Py_DECREF(set_o); ERROR_IF(true, error); } - set = PyStackRef_FromPyObjectSteal(set_o); + set = PyStackRef_FromPyObjectStealMortal(set_o); } inst(BUILD_MAP, (values[oparg*2] -- map)) { @@ -1876,7 +1875,7 @@ dummy_func( STACKREFS_TO_PYOBJECTS_CLEANUP(values_o); DECREF_INPUTS(); ERROR_IF(map_o == NULL, error); - map = PyStackRef_FromPyObjectSteal(map_o); + map = PyStackRef_FromPyObjectStealMortal(map_o); } inst(SETUP_ANNOTATIONS, (--)) { @@ -2158,9 +2157,8 @@ dummy_func( PyObject *attr_o = *value_ptr; DEOPT_IF(attr_o == NULL); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); null = PyStackRef_NULL; - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); DECREF_INPUTS(); } @@ -2188,8 +2186,7 @@ dummy_func( PyObject *attr_o = ep->me_value; DEOPT_IF(attr_o == NULL); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; DECREF_INPUTS(); } @@ -2222,8 +2219,7 @@ dummy_func( attr_o = ep->me_value; DEOPT_IF(attr_o == NULL); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; DECREF_INPUTS(); } @@ -3681,7 +3677,7 @@ dummy_func( DEOPT_IF(callable_o != (PyObject *)&PyType_Type); DEAD(callable); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(Py_NewRef(Py_TYPE(arg_o))); + res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); PyStackRef_CLOSE(arg); } @@ -4596,7 +4592,7 @@ dummy_func( frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); RELOAD_STACK(); - res = PyStackRef_FromPyObjectSteal((PyObject *)gen); + res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); LLTRACE_RESUME_FRAME(); } @@ -4608,7 +4604,7 @@ dummy_func( PyObject *slice_o = PySlice_New(start_o, stop_o, step_o); DECREF_INPUTS(); ERROR_IF(slice_o == NULL, error); - slice = PyStackRef_FromPyObjectSteal(slice_o); + slice = PyStackRef_FromPyObjectStealMortal(slice_o); } inst(CONVERT_VALUE, (value -- result)) { @@ -4955,8 +4951,7 @@ dummy_func( PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); PyObject *res_o = entries[index].me_value; DEOPT_IF(res_o == NULL); - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); null = PyStackRef_NULL; } @@ -4965,8 +4960,7 @@ dummy_func( PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); PyObject *res_o = entries[index].me_value; DEOPT_IF(res_o == NULL); - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); null = PyStackRef_NULL; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2a98a44cd9eb9a..cc407a2f0efd13 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1081,7 +1081,7 @@ PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(str_st); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectImmortal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -2006,8 +2006,7 @@ JUMP_TO_JUMP_TARGET(); } #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -2040,8 +2039,7 @@ JUMP_TO_JUMP_TARGET(); } #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -2077,7 +2075,7 @@ if (cell == NULL) { JUMP_TO_ERROR(); } - SETLOCAL(oparg, PyStackRef_FromPyObjectSteal(cell)); + SETLOCAL(oparg, PyStackRef_FromPyObjectStealMortal(cell)); break; } @@ -2207,9 +2205,10 @@ _PyStackRef tup; oparg = CURRENT_OPARG(); values = &stack_pointer[-oparg]; + assert(oparg != 0); PyObject *tup_o = _PyTuple_FromStackRefSteal(values, oparg); if (tup_o == NULL) JUMP_TO_ERROR(); - tup = PyStackRef_FromPyObjectSteal(tup_o); + tup = PyStackRef_FromPyObjectStealMortal(tup_o); stack_pointer[-oparg] = tup; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -2223,7 +2222,7 @@ values = &stack_pointer[-oparg]; PyObject *list_o = _PyList_FromStackRefSteal(values, oparg); if (list_o == NULL) JUMP_TO_ERROR(); - list = PyStackRef_FromPyObjectSteal(list_o); + list = PyStackRef_FromPyObjectStealMortal(list_o); stack_pointer[-oparg] = list; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -2309,7 +2308,7 @@ Py_DECREF(set_o); if (true) JUMP_TO_ERROR(); } - set = PyStackRef_FromPyObjectSteal(set_o); + set = PyStackRef_FromPyObjectStealMortal(set_o); stack_pointer[-oparg] = set; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -2339,7 +2338,7 @@ PyStackRef_CLOSE(values[_i]); } if (map_o == NULL) JUMP_TO_ERROR(); - map = PyStackRef_FromPyObjectSteal(map_o); + map = PyStackRef_FromPyObjectStealMortal(map_o); stack_pointer[-oparg*2] = map; stack_pointer += 1 - oparg*2; assert(WITHIN_STACK_BOUNDS()); @@ -2643,9 +2642,8 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); null = PyStackRef_NULL; - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; break; @@ -2666,9 +2664,8 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); null = PyStackRef_NULL; - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; stack_pointer[0] = null; @@ -2715,8 +2712,7 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; @@ -2770,8 +2766,7 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; @@ -4441,7 +4436,7 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(Py_NewRef(Py_TYPE(arg_o))); + res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); PyStackRef_CLOSE(arg); stack_pointer[-3] = res; stack_pointer += -2; @@ -5525,7 +5520,7 @@ frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); stack_pointer = _PyFrame_GetStackPointer(frame); - res = PyStackRef_FromPyObjectSteal((PyObject *)gen); + res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); LLTRACE_RESUME_FRAME(); stack_pointer[0] = res; stack_pointer += 1; @@ -5550,7 +5545,7 @@ PyStackRef_CLOSE(stop); PyStackRef_XCLOSE(step); if (slice_o == NULL) JUMP_TO_ERROR(); - slice = PyStackRef_FromPyObjectSteal(slice_o); + slice = PyStackRef_FromPyObjectStealMortal(slice_o); stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; stack_pointer += -1 - ((oparg == 3) ? 1 : 0); assert(WITHIN_STACK_BOUNDS()); @@ -5927,8 +5922,7 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); null = PyStackRef_NULL; stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; @@ -5949,8 +5943,7 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); null = PyStackRef_NULL; stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7e82d476c019d2..e1aa9530bd66b3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -623,7 +623,7 @@ PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(str_st); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectImmortal(res_o); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -675,7 +675,7 @@ assert(WITHIN_STACK_BOUNDS()); goto error; } - list = PyStackRef_FromPyObjectSteal(list_o); + list = PyStackRef_FromPyObjectStealMortal(list_o); stack_pointer[-oparg] = list; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -715,7 +715,7 @@ assert(WITHIN_STACK_BOUNDS()); goto error; } - map = PyStackRef_FromPyObjectSteal(map_o); + map = PyStackRef_FromPyObjectStealMortal(map_o); stack_pointer[-oparg*2] = map; stack_pointer += 1 - oparg*2; assert(WITHIN_STACK_BOUNDS()); @@ -759,7 +759,7 @@ goto error; } } - set = PyStackRef_FromPyObjectSteal(set_o); + set = PyStackRef_FromPyObjectStealMortal(set_o); stack_pointer[-oparg] = set; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -789,7 +789,7 @@ assert(WITHIN_STACK_BOUNDS()); goto error; } - slice = PyStackRef_FromPyObjectSteal(slice_o); + slice = PyStackRef_FromPyObjectStealMortal(slice_o); stack_pointer[-2 - ((oparg == 3) ? 1 : 0)] = slice; stack_pointer += -1 - ((oparg == 3) ? 1 : 0); assert(WITHIN_STACK_BOUNDS()); @@ -838,13 +838,14 @@ _PyStackRef *values; _PyStackRef tup; values = &stack_pointer[-oparg]; + assert(oparg != 0); PyObject *tup_o = _PyTuple_FromStackRefSteal(values, oparg); if (tup_o == NULL) { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; } - tup = PyStackRef_FromPyObjectSteal(tup_o); + tup = PyStackRef_FromPyObjectStealMortal(tup_o); stack_pointer[-oparg] = tup; stack_pointer += 1 - oparg; assert(WITHIN_STACK_BOUNDS()); @@ -3093,7 +3094,7 @@ DEOPT_IF(!PyStackRef_IsNull(null), CALL); DEOPT_IF(callable_o != (PyObject *)&PyType_Type, CALL); STAT_INC(CALL, hit); - res = PyStackRef_FromPyObjectSteal(Py_NewRef(Py_TYPE(arg_o))); + res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o)); PyStackRef_CLOSE(arg); stack_pointer[-3] = res; stack_pointer += -2; @@ -5423,9 +5424,8 @@ PyObject *attr_o = *value_ptr; DEOPT_IF(attr_o == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); null = PyStackRef_NULL; - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); PyStackRef_CLOSE(owner); } /* Skip 5 cache entries */ @@ -5593,8 +5593,7 @@ PyObject *attr_o = ep->me_value; DEOPT_IF(attr_o == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); } @@ -5820,8 +5819,7 @@ attr_o = ep->me_value; DEOPT_IF(attr_o == NULL, LOAD_ATTR); STAT_INC(LOAD_ATTR, hit); - Py_INCREF(attr_o); - attr = PyStackRef_FromPyObjectSteal(attr_o); + attr = PyStackRef_FromPyObjectNew(attr_o); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); } @@ -6183,8 +6181,7 @@ int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed, LOAD_GLOBAL); #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -6226,8 +6223,7 @@ int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); DEOPT_IF(!increfed, LOAD_GLOBAL); #else - Py_INCREF(res_o); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; @@ -6520,7 +6516,7 @@ if (cell == NULL) { goto error; } - SETLOCAL(oparg, PyStackRef_FromPyObjectSteal(cell)); + SETLOCAL(oparg, PyStackRef_FromPyObjectStealMortal(cell)); DISPATCH(); } @@ -7027,7 +7023,7 @@ frame = tstate->current_frame = prev; LOAD_IP(frame->return_offset); stack_pointer = _PyFrame_GetStackPointer(frame); - res = PyStackRef_FromPyObjectSteal((PyObject *)gen); + res = PyStackRef_FromPyObjectStealMortal((PyObject *)gen); LLTRACE_RESUME_FRAME(); stack_pointer[0] = res; stack_pointer += 1; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 42bdbd9ca8d0cd..1cae6b4d508abb 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -17,7 +17,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_unknown _Py_uop_sym_new_unknown #define sym_new_not_null _Py_uop_sym_new_not_null #define sym_new_type _Py_uop_sym_new_type -#define sym_is_null _Py_uop_sym_is_null +#define sym_is_null _Py_uop_sym_is_nullGUARD_BUILTINS_VERSION #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6030570f49f90a..37653d0e6dda00 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -384,7 +384,7 @@ def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]: """Find the tokens that make up the left-hand side of an assignment""" offset = 0 for tkn in reversed(node.block.tokens[: idx]): - if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}: + if tkn.kind in {"SEMI", "LBRACE", "RBRACE", "CMACRO"}: return node.block.tokens[idx - offset : idx] offset += 1 return [] @@ -548,6 +548,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_FromPyObjectImmortal", "PyStackRef_FromPyObjectNew", "PyStackRef_FromPyObjectSteal", + "PyStackRef_FromPyObjectStealMortal", "PyStackRef_HasCount", "PyStackRef_IsNone", "PyStackRef_Is", From 806c28ab94c216a3e763a72403e604bf6ba01c8d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 5 Dec 2024 18:15:48 +0000 Subject: [PATCH 14/20] Attempt to use tagged RC for a bit of extra speed --- Include/internal/pycore_opcode_metadata.h | 13 ++- Include/internal/pycore_stackref.h | 6 + Include/internal/pycore_uop_ids.h | 105 +++++++++-------- Include/internal/pycore_uop_metadata.h | 37 ++++++ Include/opcode_ids.h | 63 +++++----- Lib/_opcode_metadata.py | 63 +++++----- Python/bytecodes.c | 6 + Python/executor_cases.c.h | 134 ++++++++++++++++++++++ Python/flowgraph.c | 7 ++ Python/generated_cases.c.h | 16 +++ Python/opcode_targets.h | 2 +- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 9 ++ 13 files changed, 350 insertions(+), 113 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index ed696785599da4..9f3f96c509d59c 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -327,6 +327,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_FAST_LOAD_FAST: return 0; + case LOAD_FAST_WITH_COUNT: + return 0; case LOAD_FROM_DICT_OR_DEREF: return 1; case LOAD_FROM_DICT_OR_GLOBALS: @@ -786,6 +788,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_FAST_LOAD_FAST: return 2; + case LOAD_FAST_WITH_COUNT: + return 1; case LOAD_FROM_DICT_OR_DEREF: return 1; case LOAD_FROM_DICT_OR_GLOBALS: @@ -1555,6 +1559,10 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { *effect = 2; return 0; } + case LOAD_FAST_WITH_COUNT: { + *effect = 1; + return 0; + } case LOAD_FROM_DICT_OR_DEREF: { *effect = 0; return 0; @@ -2080,6 +2088,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST_WITH_COUNT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -2286,6 +2295,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, 0, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, 0, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, + [LOAD_FAST_WITH_COUNT] = { .nuops = 1, .uops = { { _LOAD_FAST_WITH_COUNT, 0, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, [LOAD_GLOBAL] = { .nuops = 1, .uops = { { _LOAD_GLOBAL, 0, 0 } } }, [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION_PUSH_KEYS, 1, 2 }, { _LOAD_GLOBAL_BUILTINS_FROM_KEYS, 1, 3 } } }, @@ -2504,6 +2514,7 @@ const char *_PyOpcode_OpName[266] = { [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", + [LOAD_FAST_WITH_COUNT] = "LOAD_FAST_WITH_COUNT", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", [LOAD_FROM_DICT_OR_GLOBALS] = "LOAD_FROM_DICT_OR_GLOBALS", [LOAD_GLOBAL] = "LOAD_GLOBAL", @@ -2755,6 +2766,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, + [LOAD_FAST_WITH_COUNT] = LOAD_FAST_WITH_COUNT, [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, [LOAD_FROM_DICT_OR_GLOBALS] = LOAD_FROM_DICT_OR_GLOBALS, [LOAD_GLOBAL] = LOAD_GLOBAL, @@ -2833,7 +2845,6 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ - case 116: \ case 117: \ case 118: \ case 119: \ diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 8424ac8183fb02..209bda6e6be3a6 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -347,6 +347,12 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_IMMORTAL}; } +static inline _PyStackRef +PyStackRef_WithCount(_PyStackRef ref) +{ + return (_PyStackRef){ .bits = ref.bits | Py_TAG_REFCNT }; +} + static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) { diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index fab4ce6a25b347..2b213a5b070ce2 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -215,86 +215,95 @@ extern "C" { #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST +#define _LOAD_FAST_WITH_COUNT 440 +#define _LOAD_FAST_WITH_COUNT_0 441 +#define _LOAD_FAST_WITH_COUNT_1 442 +#define _LOAD_FAST_WITH_COUNT_2 443 +#define _LOAD_FAST_WITH_COUNT_3 444 +#define _LOAD_FAST_WITH_COUNT_4 445 +#define _LOAD_FAST_WITH_COUNT_5 446 +#define _LOAD_FAST_WITH_COUNT_6 447 +#define _LOAD_FAST_WITH_COUNT_7 448 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 440 -#define _LOAD_GLOBAL_BUILTINS 441 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 442 -#define _LOAD_GLOBAL_MODULE 443 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 444 +#define _LOAD_GLOBAL 449 +#define _LOAD_GLOBAL_BUILTINS 450 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 451 +#define _LOAD_GLOBAL_MODULE 452 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 453 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 445 -#define _LOAD_SMALL_INT_0 446 -#define _LOAD_SMALL_INT_1 447 -#define _LOAD_SMALL_INT_2 448 -#define _LOAD_SMALL_INT_3 449 +#define _LOAD_SMALL_INT 454 +#define _LOAD_SMALL_INT_0 455 +#define _LOAD_SMALL_INT_1 456 +#define _LOAD_SMALL_INT_2 457 +#define _LOAD_SMALL_INT_3 458 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 450 +#define _MAKE_CALLARGS_A_TUPLE 459 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 451 +#define _MAKE_WARM 460 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 452 -#define _MAYBE_EXPAND_METHOD_KW 453 -#define _MONITOR_CALL 454 -#define _MONITOR_JUMP_BACKWARD 455 -#define _MONITOR_RESUME 456 +#define _MAYBE_EXPAND_METHOD 461 +#define _MAYBE_EXPAND_METHOD_KW 462 +#define _MONITOR_CALL 463 +#define _MONITOR_JUMP_BACKWARD 464 +#define _MONITOR_RESUME 465 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 457 -#define _POP_JUMP_IF_TRUE 458 +#define _POP_JUMP_IF_FALSE 466 +#define _POP_JUMP_IF_TRUE 467 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 459 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 468 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 460 +#define _PUSH_FRAME 469 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 461 -#define _PY_FRAME_KW 462 -#define _QUICKEN_RESUME 463 -#define _REPLACE_WITH_TRUE 464 +#define _PY_FRAME_GENERAL 470 +#define _PY_FRAME_KW 471 +#define _QUICKEN_RESUME 472 +#define _REPLACE_WITH_TRUE 473 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 465 -#define _SEND 466 -#define _SEND_GEN_FRAME 467 +#define _SAVE_RETURN_OFFSET 474 +#define _SEND 475 +#define _SEND_GEN_FRAME 476 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 468 -#define _STORE_ATTR 469 -#define _STORE_ATTR_INSTANCE_VALUE 470 -#define _STORE_ATTR_SLOT 471 -#define _STORE_ATTR_WITH_HINT 472 +#define _START_EXECUTOR 477 +#define _STORE_ATTR 478 +#define _STORE_ATTR_INSTANCE_VALUE 479 +#define _STORE_ATTR_SLOT 480 +#define _STORE_ATTR_WITH_HINT 481 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 473 -#define _STORE_FAST_0 474 -#define _STORE_FAST_1 475 -#define _STORE_FAST_2 476 -#define _STORE_FAST_3 477 -#define _STORE_FAST_4 478 -#define _STORE_FAST_5 479 -#define _STORE_FAST_6 480 -#define _STORE_FAST_7 481 +#define _STORE_FAST 482 +#define _STORE_FAST_0 483 +#define _STORE_FAST_1 484 +#define _STORE_FAST_2 485 +#define _STORE_FAST_3 486 +#define _STORE_FAST_4 487 +#define _STORE_FAST_5 488 +#define _STORE_FAST_6 489 +#define _STORE_FAST_7 490 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 482 -#define _STORE_SUBSCR 483 +#define _STORE_SLICE 491 +#define _STORE_SUBSCR 492 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 484 -#define _TO_BOOL 485 +#define _TIER2_RESUME_CHECK 493 +#define _TO_BOOL 494 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -304,13 +313,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 486 +#define _UNPACK_SEQUENCE 495 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 486 +#define MAX_UOP_ID 495 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 3becf27422f705..bd6ba7a18f8a2f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -33,6 +33,15 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_0] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_1] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_2] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_3] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_4] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_5] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_6] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT_7] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_WITH_COUNT] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -296,6 +305,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = 8, + [_LOAD_FAST_WITH_COUNT] = 8, [_LOAD_SMALL_INT] = 4, [_STORE_FAST] = 8, [_INIT_CALL_PY_EXACT_ARGS] = 5, @@ -487,6 +497,15 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", + [_LOAD_FAST_WITH_COUNT] = "_LOAD_FAST_WITH_COUNT", + [_LOAD_FAST_WITH_COUNT_0] = "_LOAD_FAST_WITH_COUNT_0", + [_LOAD_FAST_WITH_COUNT_1] = "_LOAD_FAST_WITH_COUNT_1", + [_LOAD_FAST_WITH_COUNT_2] = "_LOAD_FAST_WITH_COUNT_2", + [_LOAD_FAST_WITH_COUNT_3] = "_LOAD_FAST_WITH_COUNT_3", + [_LOAD_FAST_WITH_COUNT_4] = "_LOAD_FAST_WITH_COUNT_4", + [_LOAD_FAST_WITH_COUNT_5] = "_LOAD_FAST_WITH_COUNT_5", + [_LOAD_FAST_WITH_COUNT_6] = "_LOAD_FAST_WITH_COUNT_6", + [_LOAD_FAST_WITH_COUNT_7] = "_LOAD_FAST_WITH_COUNT_7", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", @@ -607,6 +626,24 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST: return 0; + case _LOAD_FAST_WITH_COUNT_0: + return 0; + case _LOAD_FAST_WITH_COUNT_1: + return 0; + case _LOAD_FAST_WITH_COUNT_2: + return 0; + case _LOAD_FAST_WITH_COUNT_3: + return 0; + case _LOAD_FAST_WITH_COUNT_4: + return 0; + case _LOAD_FAST_WITH_COUNT_5: + return 0; + case _LOAD_FAST_WITH_COUNT_6: + return 0; + case _LOAD_FAST_WITH_COUNT_7: + return 0; + case _LOAD_FAST_WITH_COUNT: + return 0; case _LOAD_FAST_AND_CLEAR: return 0; case _LOAD_FAST_LOAD_FAST: diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index ce3d23eaa6d56d..aa363552b80e9b 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -95,37 +95,38 @@ extern "C" { #define LOAD_FAST_AND_CLEAR 82 #define LOAD_FAST_CHECK 83 #define LOAD_FAST_LOAD_FAST 84 -#define LOAD_FROM_DICT_OR_DEREF 85 -#define LOAD_FROM_DICT_OR_GLOBALS 86 -#define LOAD_GLOBAL 87 -#define LOAD_NAME 88 -#define LOAD_SMALL_INT 89 -#define LOAD_SPECIAL 90 -#define LOAD_SUPER_ATTR 91 -#define MAKE_CELL 92 -#define MAP_ADD 93 -#define MATCH_CLASS 94 -#define POP_JUMP_IF_FALSE 95 -#define POP_JUMP_IF_NONE 96 -#define POP_JUMP_IF_NOT_NONE 97 -#define POP_JUMP_IF_TRUE 98 -#define RAISE_VARARGS 99 -#define RERAISE 100 -#define SEND 101 -#define SET_ADD 102 -#define SET_FUNCTION_ATTRIBUTE 103 -#define SET_UPDATE 104 -#define STORE_ATTR 105 -#define STORE_DEREF 106 -#define STORE_FAST 107 -#define STORE_FAST_LOAD_FAST 108 -#define STORE_FAST_STORE_FAST 109 -#define STORE_GLOBAL 110 -#define STORE_NAME 111 -#define SWAP 112 -#define UNPACK_EX 113 -#define UNPACK_SEQUENCE 114 -#define YIELD_VALUE 115 +#define LOAD_FAST_WITH_COUNT 85 +#define LOAD_FROM_DICT_OR_DEREF 86 +#define LOAD_FROM_DICT_OR_GLOBALS 87 +#define LOAD_GLOBAL 88 +#define LOAD_NAME 89 +#define LOAD_SMALL_INT 90 +#define LOAD_SPECIAL 91 +#define LOAD_SUPER_ATTR 92 +#define MAKE_CELL 93 +#define MAP_ADD 94 +#define MATCH_CLASS 95 +#define POP_JUMP_IF_FALSE 96 +#define POP_JUMP_IF_NONE 97 +#define POP_JUMP_IF_NOT_NONE 98 +#define POP_JUMP_IF_TRUE 99 +#define RAISE_VARARGS 100 +#define RERAISE 101 +#define SEND 102 +#define SET_ADD 103 +#define SET_FUNCTION_ATTRIBUTE 104 +#define SET_UPDATE 105 +#define STORE_ATTR 106 +#define STORE_DEREF 107 +#define STORE_FAST 108 +#define STORE_FAST_LOAD_FAST 109 +#define STORE_FAST_STORE_FAST 110 +#define STORE_GLOBAL 111 +#define STORE_NAME 112 +#define SWAP 113 +#define UNPACK_EX 114 +#define UNPACK_SEQUENCE 115 +#define YIELD_VALUE 116 #define RESUME 149 #define BINARY_OP_ADD_FLOAT 150 #define BINARY_OP_ADD_INT 151 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index cda3c340c322f3..43d4ef96ec0344 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -288,37 +288,38 @@ 'LOAD_FAST_AND_CLEAR': 82, 'LOAD_FAST_CHECK': 83, 'LOAD_FAST_LOAD_FAST': 84, - 'LOAD_FROM_DICT_OR_DEREF': 85, - 'LOAD_FROM_DICT_OR_GLOBALS': 86, - 'LOAD_GLOBAL': 87, - 'LOAD_NAME': 88, - 'LOAD_SMALL_INT': 89, - 'LOAD_SPECIAL': 90, - 'LOAD_SUPER_ATTR': 91, - 'MAKE_CELL': 92, - 'MAP_ADD': 93, - 'MATCH_CLASS': 94, - 'POP_JUMP_IF_FALSE': 95, - 'POP_JUMP_IF_NONE': 96, - 'POP_JUMP_IF_NOT_NONE': 97, - 'POP_JUMP_IF_TRUE': 98, - 'RAISE_VARARGS': 99, - 'RERAISE': 100, - 'SEND': 101, - 'SET_ADD': 102, - 'SET_FUNCTION_ATTRIBUTE': 103, - 'SET_UPDATE': 104, - 'STORE_ATTR': 105, - 'STORE_DEREF': 106, - 'STORE_FAST': 107, - 'STORE_FAST_LOAD_FAST': 108, - 'STORE_FAST_STORE_FAST': 109, - 'STORE_GLOBAL': 110, - 'STORE_NAME': 111, - 'SWAP': 112, - 'UNPACK_EX': 113, - 'UNPACK_SEQUENCE': 114, - 'YIELD_VALUE': 115, + 'LOAD_FAST_WITH_COUNT': 85, + 'LOAD_FROM_DICT_OR_DEREF': 86, + 'LOAD_FROM_DICT_OR_GLOBALS': 87, + 'LOAD_GLOBAL': 88, + 'LOAD_NAME': 89, + 'LOAD_SMALL_INT': 90, + 'LOAD_SPECIAL': 91, + 'LOAD_SUPER_ATTR': 92, + 'MAKE_CELL': 93, + 'MAP_ADD': 94, + 'MATCH_CLASS': 95, + 'POP_JUMP_IF_FALSE': 96, + 'POP_JUMP_IF_NONE': 97, + 'POP_JUMP_IF_NOT_NONE': 98, + 'POP_JUMP_IF_TRUE': 99, + 'RAISE_VARARGS': 100, + 'RERAISE': 101, + 'SEND': 102, + 'SET_ADD': 103, + 'SET_FUNCTION_ATTRIBUTE': 104, + 'SET_UPDATE': 105, + 'STORE_ATTR': 106, + 'STORE_DEREF': 107, + 'STORE_FAST': 108, + 'STORE_FAST_LOAD_FAST': 109, + 'STORE_FAST_STORE_FAST': 110, + 'STORE_GLOBAL': 111, + 'STORE_NAME': 112, + 'SWAP': 113, + 'UNPACK_EX': 114, + 'UNPACK_SEQUENCE': 115, + 'YIELD_VALUE': 116, 'INSTRUMENTED_END_FOR': 237, 'INSTRUMENTED_END_SEND': 238, 'INSTRUMENTED_LOAD_SUPER_ATTR': 239, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0b67e7b0390787..ec64ff500c903b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -266,6 +266,12 @@ dummy_func( value = PyStackRef_DUP(GETLOCAL(oparg)); } + replicate(8) pure inst(LOAD_FAST_WITH_COUNT, (-- value)) { + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + value = PyStackRef_WithCount(local); + } + inst(LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); // do not use SETLOCAL here, it decrefs the old value diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cc407a2f0efd13..39d95e9158519d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -197,6 +197,140 @@ break; } + case _LOAD_FAST_WITH_COUNT_0: { + _PyStackRef value; + oparg = 0; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_1: { + _PyStackRef value; + oparg = 1; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_2: { + _PyStackRef value; + oparg = 2; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_3: { + _PyStackRef value; + oparg = 3; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_4: { + _PyStackRef value; + oparg = 4; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_5: { + _PyStackRef value; + oparg = 5; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_6: { + _PyStackRef value; + oparg = 6; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT_7: { + _PyStackRef value; + oparg = 7; + assert(oparg == CURRENT_OPARG()); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _LOAD_FAST_WITH_COUNT: { + _PyStackRef value; + oparg = CURRENT_OPARG(); + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_AND_CLEAR: { _PyStackRef value; oparg = CURRENT_OPARG(); diff --git a/Python/flowgraph.c b/Python/flowgraph.c index b1097b64469ecd..b75dbda7b9b413 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1871,6 +1871,13 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) continue; } break; + case LOAD_FAST: + /* If the local is consumed immediately, we can use an empheral + * reference count */ + if (nextop == LOAD_ATTR) { + INSTR_SET_OP1(inst, LOAD_FAST_WITH_COUNT, oparg); + } + break; } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e1aa9530bd66b3..fd6f0bd2f5c6d8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5993,6 +5993,22 @@ DISPATCH(); } + TARGET(LOAD_FAST_WITH_COUNT) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(LOAD_FAST_WITH_COUNT); + _PyStackRef value; + _PyStackRef local = GETLOCAL(oparg); + assert(!PyStackRef_IsNull(local)); + _PyFrame_SetStackPointer(frame, stack_pointer); + value = PyStackRef_WithCount(local); + stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + TARGET(LOAD_FROM_DICT_OR_DEREF) { frame->instr_ptr = next_instr; next_instr += 1; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c93941dcac4abf..505c93604f95cc 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -84,6 +84,7 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_FAST_AND_CLEAR, &&TARGET_LOAD_FAST_CHECK, &&TARGET_LOAD_FAST_LOAD_FAST, + &&TARGET_LOAD_FAST_WITH_COUNT, &&TARGET_LOAD_FROM_DICT_OR_DEREF, &&TARGET_LOAD_FROM_DICT_OR_GLOBALS, &&TARGET_LOAD_GLOBAL, @@ -147,7 +148,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 1cae6b4d508abb..42bdbd9ca8d0cd 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -17,7 +17,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_unknown _Py_uop_sym_new_unknown #define sym_new_not_null _Py_uop_sym_new_not_null #define sym_new_type _Py_uop_sym_new_type -#define sym_is_null _Py_uop_sym_is_nullGUARD_BUILTINS_VERSION +#define sym_is_null _Py_uop_sym_is_null #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index f77a5aa35bdf82..4e1249279f350d 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -47,6 +47,15 @@ break; } + case _LOAD_FAST_WITH_COUNT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + stack_pointer[0] = value; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _LOAD_FAST_AND_CLEAR: { _Py_UopsSymbol *value; value = GETLOCAL(oparg); From 0c20416a5b0d841e2e44c2db42305bc357cb1104 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 11:19:09 +0000 Subject: [PATCH 15/20] Revert addition of LOAD_FAST_WITH_COUNT --- Include/internal/pycore_opcode_metadata.h | 13 +-- Include/internal/pycore_uop_ids.h | 105 ++++++++--------- Include/internal/pycore_uop_metadata.h | 37 ------ Include/opcode_ids.h | 63 +++++----- Lib/_opcode_metadata.py | 63 +++++----- Python/bytecodes.c | 6 - Python/executor_cases.c.h | 134 ---------------------- Python/flowgraph.c | 7 -- Python/gc.c | 6 +- Python/generated_cases.c.h | 16 --- Python/opcode_targets.h | 2 +- Python/optimizer_cases.c.h | 9 -- 12 files changed, 114 insertions(+), 347 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 9f3f96c509d59c..ed696785599da4 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -327,8 +327,6 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_FAST_LOAD_FAST: return 0; - case LOAD_FAST_WITH_COUNT: - return 0; case LOAD_FROM_DICT_OR_DEREF: return 1; case LOAD_FROM_DICT_OR_GLOBALS: @@ -788,8 +786,6 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_FAST_LOAD_FAST: return 2; - case LOAD_FAST_WITH_COUNT: - return 1; case LOAD_FROM_DICT_OR_DEREF: return 1; case LOAD_FROM_DICT_OR_GLOBALS: @@ -1559,10 +1555,6 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { *effect = 2; return 0; } - case LOAD_FAST_WITH_COUNT: { - *effect = 1; - return 0; - } case LOAD_FROM_DICT_OR_DEREF: { *effect = 0; return 0; @@ -2088,7 +2080,6 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, - [LOAD_FAST_WITH_COUNT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -2295,7 +2286,6 @@ _PyOpcode_macro_expansion[256] = { [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, 0, 0 } } }, [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, 0, 0 } } }, [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, - [LOAD_FAST_WITH_COUNT] = { .nuops = 1, .uops = { { _LOAD_FAST_WITH_COUNT, 0, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, [LOAD_GLOBAL] = { .nuops = 1, .uops = { { _LOAD_GLOBAL, 0, 0 } } }, [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION_PUSH_KEYS, 1, 2 }, { _LOAD_GLOBAL_BUILTINS_FROM_KEYS, 1, 3 } } }, @@ -2514,7 +2504,6 @@ const char *_PyOpcode_OpName[266] = { [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", - [LOAD_FAST_WITH_COUNT] = "LOAD_FAST_WITH_COUNT", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", [LOAD_FROM_DICT_OR_GLOBALS] = "LOAD_FROM_DICT_OR_GLOBALS", [LOAD_GLOBAL] = "LOAD_GLOBAL", @@ -2766,7 +2755,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, - [LOAD_FAST_WITH_COUNT] = LOAD_FAST_WITH_COUNT, [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, [LOAD_FROM_DICT_OR_GLOBALS] = LOAD_FROM_DICT_OR_GLOBALS, [LOAD_GLOBAL] = LOAD_GLOBAL, @@ -2845,6 +2833,7 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ + case 116: \ case 117: \ case 118: \ case 119: \ diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 2b213a5b070ce2..fab4ce6a25b347 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -215,95 +215,86 @@ extern "C" { #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST -#define _LOAD_FAST_WITH_COUNT 440 -#define _LOAD_FAST_WITH_COUNT_0 441 -#define _LOAD_FAST_WITH_COUNT_1 442 -#define _LOAD_FAST_WITH_COUNT_2 443 -#define _LOAD_FAST_WITH_COUNT_3 444 -#define _LOAD_FAST_WITH_COUNT_4 445 -#define _LOAD_FAST_WITH_COUNT_5 446 -#define _LOAD_FAST_WITH_COUNT_6 447 -#define _LOAD_FAST_WITH_COUNT_7 448 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 449 -#define _LOAD_GLOBAL_BUILTINS 450 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 451 -#define _LOAD_GLOBAL_MODULE 452 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 453 +#define _LOAD_GLOBAL 440 +#define _LOAD_GLOBAL_BUILTINS 441 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 442 +#define _LOAD_GLOBAL_MODULE 443 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 444 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 454 -#define _LOAD_SMALL_INT_0 455 -#define _LOAD_SMALL_INT_1 456 -#define _LOAD_SMALL_INT_2 457 -#define _LOAD_SMALL_INT_3 458 +#define _LOAD_SMALL_INT 445 +#define _LOAD_SMALL_INT_0 446 +#define _LOAD_SMALL_INT_1 447 +#define _LOAD_SMALL_INT_2 448 +#define _LOAD_SMALL_INT_3 449 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 459 +#define _MAKE_CALLARGS_A_TUPLE 450 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 460 +#define _MAKE_WARM 451 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 461 -#define _MAYBE_EXPAND_METHOD_KW 462 -#define _MONITOR_CALL 463 -#define _MONITOR_JUMP_BACKWARD 464 -#define _MONITOR_RESUME 465 +#define _MAYBE_EXPAND_METHOD 452 +#define _MAYBE_EXPAND_METHOD_KW 453 +#define _MONITOR_CALL 454 +#define _MONITOR_JUMP_BACKWARD 455 +#define _MONITOR_RESUME 456 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 466 -#define _POP_JUMP_IF_TRUE 467 +#define _POP_JUMP_IF_FALSE 457 +#define _POP_JUMP_IF_TRUE 458 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 468 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 459 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 469 +#define _PUSH_FRAME 460 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 470 -#define _PY_FRAME_KW 471 -#define _QUICKEN_RESUME 472 -#define _REPLACE_WITH_TRUE 473 +#define _PY_FRAME_GENERAL 461 +#define _PY_FRAME_KW 462 +#define _QUICKEN_RESUME 463 +#define _REPLACE_WITH_TRUE 464 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 474 -#define _SEND 475 -#define _SEND_GEN_FRAME 476 +#define _SAVE_RETURN_OFFSET 465 +#define _SEND 466 +#define _SEND_GEN_FRAME 467 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 477 -#define _STORE_ATTR 478 -#define _STORE_ATTR_INSTANCE_VALUE 479 -#define _STORE_ATTR_SLOT 480 -#define _STORE_ATTR_WITH_HINT 481 +#define _START_EXECUTOR 468 +#define _STORE_ATTR 469 +#define _STORE_ATTR_INSTANCE_VALUE 470 +#define _STORE_ATTR_SLOT 471 +#define _STORE_ATTR_WITH_HINT 472 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 482 -#define _STORE_FAST_0 483 -#define _STORE_FAST_1 484 -#define _STORE_FAST_2 485 -#define _STORE_FAST_3 486 -#define _STORE_FAST_4 487 -#define _STORE_FAST_5 488 -#define _STORE_FAST_6 489 -#define _STORE_FAST_7 490 +#define _STORE_FAST 473 +#define _STORE_FAST_0 474 +#define _STORE_FAST_1 475 +#define _STORE_FAST_2 476 +#define _STORE_FAST_3 477 +#define _STORE_FAST_4 478 +#define _STORE_FAST_5 479 +#define _STORE_FAST_6 480 +#define _STORE_FAST_7 481 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 491 -#define _STORE_SUBSCR 492 +#define _STORE_SLICE 482 +#define _STORE_SUBSCR 483 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 493 -#define _TO_BOOL 494 +#define _TIER2_RESUME_CHECK 484 +#define _TO_BOOL 485 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -313,13 +304,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 495 +#define _UNPACK_SEQUENCE 486 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 495 +#define MAX_UOP_ID 486 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index bd6ba7a18f8a2f..3becf27422f705 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -33,15 +33,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_0] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_1] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_2] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_3] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_4] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_5] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_6] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT_7] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_LOAD_FAST_WITH_COUNT] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -305,7 +296,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = 8, - [_LOAD_FAST_WITH_COUNT] = 8, [_LOAD_SMALL_INT] = 4, [_STORE_FAST] = 8, [_INIT_CALL_PY_EXACT_ARGS] = 5, @@ -497,15 +487,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", - [_LOAD_FAST_WITH_COUNT] = "_LOAD_FAST_WITH_COUNT", - [_LOAD_FAST_WITH_COUNT_0] = "_LOAD_FAST_WITH_COUNT_0", - [_LOAD_FAST_WITH_COUNT_1] = "_LOAD_FAST_WITH_COUNT_1", - [_LOAD_FAST_WITH_COUNT_2] = "_LOAD_FAST_WITH_COUNT_2", - [_LOAD_FAST_WITH_COUNT_3] = "_LOAD_FAST_WITH_COUNT_3", - [_LOAD_FAST_WITH_COUNT_4] = "_LOAD_FAST_WITH_COUNT_4", - [_LOAD_FAST_WITH_COUNT_5] = "_LOAD_FAST_WITH_COUNT_5", - [_LOAD_FAST_WITH_COUNT_6] = "_LOAD_FAST_WITH_COUNT_6", - [_LOAD_FAST_WITH_COUNT_7] = "_LOAD_FAST_WITH_COUNT_7", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", @@ -626,24 +607,6 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST: return 0; - case _LOAD_FAST_WITH_COUNT_0: - return 0; - case _LOAD_FAST_WITH_COUNT_1: - return 0; - case _LOAD_FAST_WITH_COUNT_2: - return 0; - case _LOAD_FAST_WITH_COUNT_3: - return 0; - case _LOAD_FAST_WITH_COUNT_4: - return 0; - case _LOAD_FAST_WITH_COUNT_5: - return 0; - case _LOAD_FAST_WITH_COUNT_6: - return 0; - case _LOAD_FAST_WITH_COUNT_7: - return 0; - case _LOAD_FAST_WITH_COUNT: - return 0; case _LOAD_FAST_AND_CLEAR: return 0; case _LOAD_FAST_LOAD_FAST: diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index aa363552b80e9b..ce3d23eaa6d56d 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -95,38 +95,37 @@ extern "C" { #define LOAD_FAST_AND_CLEAR 82 #define LOAD_FAST_CHECK 83 #define LOAD_FAST_LOAD_FAST 84 -#define LOAD_FAST_WITH_COUNT 85 -#define LOAD_FROM_DICT_OR_DEREF 86 -#define LOAD_FROM_DICT_OR_GLOBALS 87 -#define LOAD_GLOBAL 88 -#define LOAD_NAME 89 -#define LOAD_SMALL_INT 90 -#define LOAD_SPECIAL 91 -#define LOAD_SUPER_ATTR 92 -#define MAKE_CELL 93 -#define MAP_ADD 94 -#define MATCH_CLASS 95 -#define POP_JUMP_IF_FALSE 96 -#define POP_JUMP_IF_NONE 97 -#define POP_JUMP_IF_NOT_NONE 98 -#define POP_JUMP_IF_TRUE 99 -#define RAISE_VARARGS 100 -#define RERAISE 101 -#define SEND 102 -#define SET_ADD 103 -#define SET_FUNCTION_ATTRIBUTE 104 -#define SET_UPDATE 105 -#define STORE_ATTR 106 -#define STORE_DEREF 107 -#define STORE_FAST 108 -#define STORE_FAST_LOAD_FAST 109 -#define STORE_FAST_STORE_FAST 110 -#define STORE_GLOBAL 111 -#define STORE_NAME 112 -#define SWAP 113 -#define UNPACK_EX 114 -#define UNPACK_SEQUENCE 115 -#define YIELD_VALUE 116 +#define LOAD_FROM_DICT_OR_DEREF 85 +#define LOAD_FROM_DICT_OR_GLOBALS 86 +#define LOAD_GLOBAL 87 +#define LOAD_NAME 88 +#define LOAD_SMALL_INT 89 +#define LOAD_SPECIAL 90 +#define LOAD_SUPER_ATTR 91 +#define MAKE_CELL 92 +#define MAP_ADD 93 +#define MATCH_CLASS 94 +#define POP_JUMP_IF_FALSE 95 +#define POP_JUMP_IF_NONE 96 +#define POP_JUMP_IF_NOT_NONE 97 +#define POP_JUMP_IF_TRUE 98 +#define RAISE_VARARGS 99 +#define RERAISE 100 +#define SEND 101 +#define SET_ADD 102 +#define SET_FUNCTION_ATTRIBUTE 103 +#define SET_UPDATE 104 +#define STORE_ATTR 105 +#define STORE_DEREF 106 +#define STORE_FAST 107 +#define STORE_FAST_LOAD_FAST 108 +#define STORE_FAST_STORE_FAST 109 +#define STORE_GLOBAL 110 +#define STORE_NAME 111 +#define SWAP 112 +#define UNPACK_EX 113 +#define UNPACK_SEQUENCE 114 +#define YIELD_VALUE 115 #define RESUME 149 #define BINARY_OP_ADD_FLOAT 150 #define BINARY_OP_ADD_INT 151 diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index 43d4ef96ec0344..cda3c340c322f3 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -288,38 +288,37 @@ 'LOAD_FAST_AND_CLEAR': 82, 'LOAD_FAST_CHECK': 83, 'LOAD_FAST_LOAD_FAST': 84, - 'LOAD_FAST_WITH_COUNT': 85, - 'LOAD_FROM_DICT_OR_DEREF': 86, - 'LOAD_FROM_DICT_OR_GLOBALS': 87, - 'LOAD_GLOBAL': 88, - 'LOAD_NAME': 89, - 'LOAD_SMALL_INT': 90, - 'LOAD_SPECIAL': 91, - 'LOAD_SUPER_ATTR': 92, - 'MAKE_CELL': 93, - 'MAP_ADD': 94, - 'MATCH_CLASS': 95, - 'POP_JUMP_IF_FALSE': 96, - 'POP_JUMP_IF_NONE': 97, - 'POP_JUMP_IF_NOT_NONE': 98, - 'POP_JUMP_IF_TRUE': 99, - 'RAISE_VARARGS': 100, - 'RERAISE': 101, - 'SEND': 102, - 'SET_ADD': 103, - 'SET_FUNCTION_ATTRIBUTE': 104, - 'SET_UPDATE': 105, - 'STORE_ATTR': 106, - 'STORE_DEREF': 107, - 'STORE_FAST': 108, - 'STORE_FAST_LOAD_FAST': 109, - 'STORE_FAST_STORE_FAST': 110, - 'STORE_GLOBAL': 111, - 'STORE_NAME': 112, - 'SWAP': 113, - 'UNPACK_EX': 114, - 'UNPACK_SEQUENCE': 115, - 'YIELD_VALUE': 116, + 'LOAD_FROM_DICT_OR_DEREF': 85, + 'LOAD_FROM_DICT_OR_GLOBALS': 86, + 'LOAD_GLOBAL': 87, + 'LOAD_NAME': 88, + 'LOAD_SMALL_INT': 89, + 'LOAD_SPECIAL': 90, + 'LOAD_SUPER_ATTR': 91, + 'MAKE_CELL': 92, + 'MAP_ADD': 93, + 'MATCH_CLASS': 94, + 'POP_JUMP_IF_FALSE': 95, + 'POP_JUMP_IF_NONE': 96, + 'POP_JUMP_IF_NOT_NONE': 97, + 'POP_JUMP_IF_TRUE': 98, + 'RAISE_VARARGS': 99, + 'RERAISE': 100, + 'SEND': 101, + 'SET_ADD': 102, + 'SET_FUNCTION_ATTRIBUTE': 103, + 'SET_UPDATE': 104, + 'STORE_ATTR': 105, + 'STORE_DEREF': 106, + 'STORE_FAST': 107, + 'STORE_FAST_LOAD_FAST': 108, + 'STORE_FAST_STORE_FAST': 109, + 'STORE_GLOBAL': 110, + 'STORE_NAME': 111, + 'SWAP': 112, + 'UNPACK_EX': 113, + 'UNPACK_SEQUENCE': 114, + 'YIELD_VALUE': 115, 'INSTRUMENTED_END_FOR': 237, 'INSTRUMENTED_END_SEND': 238, 'INSTRUMENTED_LOAD_SUPER_ATTR': 239, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ec64ff500c903b..0b67e7b0390787 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -266,12 +266,6 @@ dummy_func( value = PyStackRef_DUP(GETLOCAL(oparg)); } - replicate(8) pure inst(LOAD_FAST_WITH_COUNT, (-- value)) { - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - value = PyStackRef_WithCount(local); - } - inst(LOAD_FAST_AND_CLEAR, (-- value)) { value = GETLOCAL(oparg); // do not use SETLOCAL here, it decrefs the old value diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 39d95e9158519d..cc407a2f0efd13 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -197,140 +197,6 @@ break; } - case _LOAD_FAST_WITH_COUNT_0: { - _PyStackRef value; - oparg = 0; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_1: { - _PyStackRef value; - oparg = 1; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_2: { - _PyStackRef value; - oparg = 2; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_3: { - _PyStackRef value; - oparg = 3; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_4: { - _PyStackRef value; - oparg = 4; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_5: { - _PyStackRef value; - oparg = 5; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_6: { - _PyStackRef value; - oparg = 6; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT_7: { - _PyStackRef value; - oparg = 7; - assert(oparg == CURRENT_OPARG()); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - - case _LOAD_FAST_WITH_COUNT: { - _PyStackRef value; - oparg = CURRENT_OPARG(); - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - case _LOAD_FAST_AND_CLEAR: { _PyStackRef value; oparg = CURRENT_OPARG(); diff --git a/Python/flowgraph.c b/Python/flowgraph.c index b75dbda7b9b413..b1097b64469ecd 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -1871,13 +1871,6 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) continue; } break; - case LOAD_FAST: - /* If the local is consumed immediately, we can use an empheral - * reference count */ - if (nextop == LOAD_ATTR) { - INSTR_SET_OP1(inst, LOAD_FAST_WITH_COUNT, oparg); - } - break; } } diff --git a/Python/gc.c b/Python/gc.c index fd29a48518e71b..b999f6a5927c78 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1438,11 +1438,9 @@ frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_s while (sp > locals) { sp--; _PyStackRef ref = *sp; - if (!PyStackRef_IsNull(ref)) { + if (PyStackRef_IsMortal(ref)) { PyObject *op = PyStackRef_AsPyObjectBorrow(ref); - if (!_Py_IsImmortal(op)) { - move_unvisited(op, stack, visited_space); - } + move_unvisited(op, stack, visited_space); } } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fd6f0bd2f5c6d8..e1aa9530bd66b3 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5993,22 +5993,6 @@ DISPATCH(); } - TARGET(LOAD_FAST_WITH_COUNT) { - frame->instr_ptr = next_instr; - next_instr += 1; - INSTRUCTION_STATS(LOAD_FAST_WITH_COUNT); - _PyStackRef value; - _PyStackRef local = GETLOCAL(oparg); - assert(!PyStackRef_IsNull(local)); - _PyFrame_SetStackPointer(frame, stack_pointer); - value = PyStackRef_WithCount(local); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - DISPATCH(); - } - TARGET(LOAD_FROM_DICT_OR_DEREF) { frame->instr_ptr = next_instr; next_instr += 1; diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 505c93604f95cc..c93941dcac4abf 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -84,7 +84,6 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_FAST_AND_CLEAR, &&TARGET_LOAD_FAST_CHECK, &&TARGET_LOAD_FAST_LOAD_FAST, - &&TARGET_LOAD_FAST_WITH_COUNT, &&TARGET_LOAD_FROM_DICT_OR_DEREF, &&TARGET_LOAD_FROM_DICT_OR_GLOBALS, &&TARGET_LOAD_GLOBAL, @@ -148,6 +147,7 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, + &&_unknown_opcode, &&TARGET_RESUME, &&TARGET_BINARY_OP_ADD_FLOAT, &&TARGET_BINARY_OP_ADD_INT, diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4e1249279f350d..f77a5aa35bdf82 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -47,15 +47,6 @@ break; } - case _LOAD_FAST_WITH_COUNT: { - _Py_UopsSymbol *value; - value = sym_new_not_null(ctx); - stack_pointer[0] = value; - stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); - break; - } - case _LOAD_FAST_AND_CLEAR: { _Py_UopsSymbol *value; value = GETLOCAL(oparg); From c88bcbceb11db0c334034c4b7bf95596b16a729b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 17:47:24 +0000 Subject: [PATCH 16/20] Avoid INCREF + Steal pairs by using New or return stack ref directly --- Include/internal/pycore_ceval.h | 2 +- Include/internal/pycore_long.h | 2 +- Objects/floatobject.c | 27 ++++++++++++----------- Objects/longobject.c | 29 +++++++++++++++++++++++-- Python/bytecodes.c | 38 ++++++++++++++------------------- Python/executor_cases.c.h | 35 ++++++++++++++---------------- Python/generated_cases.c.h | 35 ++++++++++++++---------------- 7 files changed, 92 insertions(+), 76 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 80bd19a887871c..e33622c1a040e5 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -325,7 +325,7 @@ _Py_eval_breaker_bit_is_set(PyThreadState *tstate, uintptr_t bit) void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); void _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); -PyAPI_FUNC(PyObject *) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value); +PyAPI_FUNC(_PyStackRef) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value); #ifdef __cplusplus diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 196b4152280a35..3ba98507b2a590 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -109,7 +109,7 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, int64_t); // Export for 'math' shared extension PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t); -PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(_PyStackRef) _PyLong_Add(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject *right); PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject *right); diff --git a/Objects/floatobject.c b/Objects/floatobject.c index bcc77287454768..06e2438cad8f0c 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -136,34 +136,37 @@ PyFloat_FromDouble(double fval) #ifdef Py_GIL_DISABLED -PyObject *_PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) +_PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) { PyStackRef_CLOSE(left); PyStackRef_CLOSE(right); - return PyFloat_FromDouble(value); + return PyStackRef_FromPyObjectSteal(PyFloat_FromDouble(value)); } #else // Py_GIL_DISABLED -PyObject *_PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) +_PyStackRef _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) { - PyObject *left_o = PyStackRef_AsPyObjectSteal(left); - PyObject *right_o = PyStackRef_AsPyObjectSteal(right); + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); if (Py_REFCNT(left_o) == 1) { ((PyFloatObject *)left_o)->ob_fval = value; - _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); - return left_o; + PyStackRef_CLOSE(right); + return left; } else if (Py_REFCNT(right_o) == 1) { ((PyFloatObject *)right_o)->ob_fval = value; - _Py_DECREF_NO_DEALLOC(left_o); - return right_o; + PyStackRef_CLOSE(left); + return right; } else { PyObject *result = PyFloat_FromDouble(value); - _Py_DECREF_NO_DEALLOC(left_o); - _Py_DECREF_NO_DEALLOC(right_o); - return result; + PyStackRef_CLOSE(left); + PyStackRef_CLOSE(right); + if (result == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectStealMortal(result); } } diff --git a/Objects/longobject.c b/Objects/longobject.c index 4aa35685b509f2..8db51301b63229 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -286,6 +286,27 @@ _PyLong_FromSTwoDigits(stwodigits x) return (PyLongObject*)_PyLong_FromLarge(x); } +/* Create a new int object from a C word-sized int, return a stackref */ +static inline _PyStackRef +_PyLongRef_FromSTwoDigitsRef(stwodigits x) +{ + if (IS_SMALL_INT(x)) { + return PyStackRef_FromPyObjectImmortal(get_small_int((sdigit)x)); + } + assert(x != 0); + PyObject *res; + if (is_medium_int(x)) { + res = _PyLong_FromMedium((sdigit)x); + } + else { + res = _PyLong_FromLarge(x); + } + if (res == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectStealMortal(res); +} + /* If a freshly-allocated int is already shared, it must be a small integer, so negating it must go to PyLong_FromLong */ Py_LOCAL_INLINE(void) @@ -3812,10 +3833,14 @@ long_add(PyLongObject *a, PyLongObject *b) return z; } -PyObject * +_PyStackRef _PyLong_Add(PyLongObject *a, PyLongObject *b) { - return (PyObject*)long_add(a, b); + if (_PyLong_BothAreCompact(a, b)) { + stwodigits z = medium_value(a) + medium_value(b); + return _PyLongRef_FromSTwoDigitsRef(z); + } + return PyStackRef_FromPyObjectSteal((PyObject*)long_add(a, b)); } static PyObject * diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0b67e7b0390787..3f0906013de721 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -542,12 +542,11 @@ dummy_func( PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); INPUTS_DEAD(); - ERROR_IF(res_o == NULL, error); - res = PyStackRef_FromPyObjectSteal(res_o); + ERROR_IF(PyStackRef_IsNull(res), error); } pure op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { @@ -595,10 +594,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); - ERROR_IF(res_o == NULL, error); - res = PyStackRef_FromPyObjectSteal(res_o); + ERROR_IF(PyStackRef_IsNull(res), error); } pure op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { @@ -609,10 +607,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); - ERROR_IF(res_o == NULL, error); - res = PyStackRef_FromPyObjectSteal(res_o); + ERROR_IF(PyStackRef_IsNull(res), error); } pure op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { @@ -623,10 +620,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); - ERROR_IF(res_o == NULL, error); - res = PyStackRef_FromPyObjectSteal(res_o); + ERROR_IF(PyStackRef_IsNull(res), error); } macro(BINARY_OP_MULTIPLY_FLOAT) = @@ -807,17 +803,18 @@ dummy_func( PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); DEOPT_IF(res_o == NULL); STAT_INC(BINARY_SUBSCR, hit); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + DEAD(sub_st); + PyStackRef_CLOSE(list_st); + res = PyStackRef_FromPyObjectSteal(res_o); #else DEOPT_IF(index >= PyList_GET_SIZE(list)); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); - Py_INCREF(res_o); + res = PyStackRef_FromPyObjectNew(res_o); + DECREF_INPUTS(); #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); - DEAD(sub_st); - PyStackRef_CLOSE(list_st); - res = PyStackRef_FromPyObjectSteal(res_o); } inst(BINARY_SUBSCR_STR_INT, (unused/1, str_st, sub_st -- res)) { @@ -854,11 +851,8 @@ dummy_func( STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); - Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); - DEAD(sub_st); - PyStackRef_CLOSE(tuple_st); - res = PyStackRef_FromPyObjectSteal(res_o); + res = PyStackRef_FromPyObjectNew(res_o); + DECREF_INPUTS(); } inst(BINARY_SUBSCR_DICT, (unused/1, dict_st, sub_st -- res)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cc407a2f0efd13..9ba99dd5f1c057 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -683,11 +683,10 @@ PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); - if (res_o == NULL) JUMP_TO_ERROR(); - res = PyStackRef_FromPyObjectSteal(res_o); + if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -766,9 +765,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) JUMP_TO_ERROR(); - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -787,9 +785,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) JUMP_TO_ERROR(); - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -808,9 +805,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) JUMP_TO_ERROR(); - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) JUMP_TO_ERROR(); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1027,6 +1023,9 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(BINARY_SUBSCR, hit); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE(list_st); + res = PyStackRef_FromPyObjectSteal(res_o); #else if (index >= PyList_GET_SIZE(list)) { UOP_STAT_INC(uopcode, miss); @@ -1035,11 +1034,10 @@ STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); - Py_INCREF(res_o); - #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + res = PyStackRef_FromPyObjectNew(res_o); PyStackRef_CLOSE(list_st); - res = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(sub_st); + #endif stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -1117,10 +1115,9 @@ STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); - Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + res = PyStackRef_FromPyObjectNew(res_o); PyStackRef_CLOSE(tuple_st); - res = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(sub_st); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e1aa9530bd66b3..30e7cab4d56444 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -84,9 +84,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) goto pop_2_error; - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) goto pop_2_error; } stack_pointer[-2] = res; stack_pointer += -1; @@ -117,11 +116,10 @@ PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); - PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + res = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); - if (res_o == NULL) goto pop_2_error; - res = PyStackRef_FromPyObjectSteal(res_o); + if (PyStackRef_IsNull(res)) goto pop_2_error; } stack_pointer[-2] = res; stack_pointer += -1; @@ -250,9 +248,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) goto pop_2_error; - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) goto pop_2_error; } stack_pointer[-2] = res; stack_pointer += -1; @@ -321,9 +318,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); - if (res_o == NULL) goto pop_2_error; - res = PyStackRef_FromPyObjectSteal(res_o); + res = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res)) goto pop_2_error; } stack_pointer[-2] = res; stack_pointer += -1; @@ -582,16 +578,18 @@ stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(res_o == NULL, BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + PyStackRef_CLOSE(list_st); + res = PyStackRef_FromPyObjectSteal(res_o); #else DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); - Py_INCREF(res_o); - #endif - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + res = PyStackRef_FromPyObjectNew(res_o); PyStackRef_CLOSE(list_st); - res = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(sub_st); + #endif stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -652,10 +650,9 @@ STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); - Py_INCREF(res_o); - PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); + res = PyStackRef_FromPyObjectNew(res_o); PyStackRef_CLOSE(tuple_st); - res = PyStackRef_FromPyObjectSteal(res_o); + PyStackRef_CLOSE(sub_st); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); From f5dec96adef1fc25395b0aaea14e6f94d1320181 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 18:16:56 +0000 Subject: [PATCH 17/20] Avoid masking when comparing to True and False --- Include/internal/pycore_stackref.h | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 209bda6e6be3a6..eadf4a222a15fb 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -219,9 +219,9 @@ static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; /* We should be able to guarantee that the tag bits are set for immortal objects */ -#define PyStackRef_IsTrue(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_TrueStruct)) -#define PyStackRef_IsFalse(ref) (((ref).bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_FalseStruct)) - +#define PyStackRef_IsTrue(REF) ((REF).bits == (((uintptr_t)&_Py_TrueStruct) | Py_TAG_IMMORTAL)) +#define PyStackRef_IsFalse(REF) ((REF).bits == (((uintptr_t)&_Py_FalseStruct) | Py_TAG_IMMORTAL)) +#define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) #ifdef Py_DEBUG @@ -244,20 +244,9 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { } } -static inline int -PyStackRef_IsNone(_PyStackRef ref) -{ - if ((ref.bits & (~Py_TAG_BITS)) == ((uintptr_t)&_Py_NoneStruct)) { - assert ((ref.bits & Py_TAG_BITS) == Py_TAG_IMMORTAL); - return 1; - } - return 0; -} - #else #define PyStackRef_CheckValid(REF) ((void)0) -#define PyStackRef_IsNone(REF) ((REF).bits == (((uintptr_t)&_Py_NoneStruct) | Py_TAG_IMMORTAL)) #endif From 1069d98d1e82adbe814be7d472d04b6a4056ece7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 10 Dec 2024 15:21:50 +0000 Subject: [PATCH 18/20] Revert not-quite-true assert --- Include/internal/pycore_stackref.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index eadf4a222a15fb..dd9aab4a97f93d 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -230,7 +230,6 @@ static inline void PyStackRef_CheckValid(_PyStackRef ref) { PyObject *obj = BITS_TO_PTR_MASKED(ref); switch (tag) { case 0: - assert(!_Py_IsImmortal(obj)); break; case Py_TAG_REFCNT: /* Can be immortal if object was made immortal after reference came into existence */ From d23ae47507a849026c67fd2497a163a93df4cb1b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 12 Dec 2024 16:10:10 +0000 Subject: [PATCH 19/20] Fix up after merge --- Include/internal/pycore_opcode_metadata.h | 4 ++-- Include/internal/pycore_stackref.h | 4 ++-- Include/internal/pycore_uop_metadata.h | 2 +- Include/refcount.h | 2 +- Python/bytecodes.c | 7 +++---- Python/ceval.c | 4 +++- Python/executor_cases.c.h | 13 ++++++------ Python/generated_cases.c.h | 24 ++++++++--------------- 8 files changed, 27 insertions(+), 33 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 208ca8d13d823e..ed696785599da4 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1975,9 +1975,9 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ISINSTANCE] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 4339531ea2bd86..282fc635b7e957 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -226,11 +226,11 @@ static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; #ifdef Py_DEBUG static inline void PyStackRef_CheckValid(_PyStackRef ref) { + assert(ref.bits != 0); int tag = ref.bits & Py_TAG_BITS; PyObject *obj = BITS_TO_PTR_MASKED(ref); switch (tag) { case 0: - break; case Py_TAG_REFCNT: /* Can be immortal if object was made immortal after reference came into existence */ assert(obj != NULL && obj != Py_True && obj != Py_False && obj != Py_None); @@ -346,7 +346,7 @@ PyStackRef_DUP(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); if (!PyStackRef_HasCount(ref)) { - Py_INCREF(BITS_TO_PTR(ref)); + Py_INCREF_MORTAL(BITS_TO_PTR(ref)); } return ref; } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 0c00d51dca6e79..3becf27422f705 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -248,7 +248,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_METHOD_DESCRIPTOR_NOARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAYBE_EXPAND_METHOD_KW] = HAS_ARG_FLAG, - [_PY_FRAME_KW] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_PY_FRAME_KW] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_FUNCTION_VERSION_KW] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_CHECK_METHOD_VERSION_KW] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_EXPAND_METHOD_KW] = HAS_ARG_FLAG, diff --git a/Include/refcount.h b/Include/refcount.h index 26a8da661525d1..f78343f8f0a559 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -245,7 +245,7 @@ PyAPI_FUNC(void) _Py_DecRef(PyObject *); #ifndef Py_GIL_DISABLED static inline Py_ALWAYS_INLINE void Py_INCREF_MORTAL(PyObject *op) { - assert(!_Py_IsImmortal(op)); + assert(!_Py_IsStaticImmortal(op)); op->ob_refcnt++; _Py_INCREF_STAT_INC(); #if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index f7ec71b7891b0f..3df5ae7324d7b9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4312,7 +4312,7 @@ dummy_func( assert(Py_TYPE(callable_o) == &PyFunction_Type); int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -4320,9 +4320,8 @@ dummy_func( // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. SYNC_SP(); - if (new_frame == NULL) { - ERROR_NO_POP(); - } + ERROR_IF(temp == NULL, error); + new_frame = temp; } op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable[1], self_or_null[1], unused[oparg], kwnames -- callable[1], self_or_null[1], unused[oparg], kwnames)) { diff --git a/Python/ceval.c b/Python/ceval.c index 7531bbae293ef4..4e4fae2350ce64 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1500,7 +1500,9 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, { PyCodeObject *co = (PyCodeObject*)func->func_code; const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount; - + for (Py_ssize_t i = 0; i < argcount; i++) { + PyStackRef_CheckValid(args[i]); + } /* Create a dictionary for keyword parameters (**kwags) */ PyObject *kwdict; Py_ssize_t i; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 720cb0f4a18b21..e9f05a603c6559 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5263,7 +5263,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -5271,12 +5271,13 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + if (temp == NULL) JUMP_TO_ERROR(); + new_frame = temp; + stack_pointer[0].bits = (uintptr_t)new_frame; + stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - JUMP_TO_ERROR(); - } break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4c4111b068124f..a59e3198d7efd8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2096,7 +2096,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -2104,12 +2104,10 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - goto error; - } + if (temp == NULL) goto error; + new_frame = temp; } // _SAVE_RETURN_OFFSET { @@ -2126,8 +2124,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); @@ -2274,7 +2270,7 @@ int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); _PyFrame_SetStackPointer(frame, stack_pointer); - new_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, callable[0], locals, args, positional_args, kwnames_o, frame ); @@ -2282,12 +2278,10 @@ PyStackRef_CLOSE(kwnames); // The frame has stolen all the arguments from the stack, // so there is no need to clean them up. - stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame; - stack_pointer += -2 - oparg; + stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (new_frame == NULL) { - goto error; - } + if (temp == NULL) goto error; + new_frame = temp; } // _SAVE_RETURN_OFFSET { @@ -2304,8 +2298,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); From d230f6868d4b791fb8571ee852ce78699a7d37db Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 13 Dec 2024 11:27:44 +0000 Subject: [PATCH 20/20] Fix compiler errors and warnings --- Include/internal/pycore_frame.h | 1 - Python/ceval.c | 2 +- Python/optimizer_bytecodes.c | 3 +-- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 1d35075b7486e8..febd96e5ffa17b 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -158,7 +158,6 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * dest->stackpointer = dest->localsplus + stacktop; for (int i = 0; i < stacktop; i++) { dest->localsplus[i] = PyStackRef_MakeHeapSafe(src->localsplus[i]); - PyStackRef_CheckValid(dest->localsplus[i]); } #ifdef Py_GIL_DISABLED diff --git a/Python/ceval.c b/Python/ceval.c index 5d565c42062f46..8dc4ab5f3d6919 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1898,9 +1898,9 @@ _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func, if (kwnames) { total_args += PyTuple_GET_SIZE(kwnames); } + _PyStackRef stack_array[8]; _PyStackRef *arguments; if (total_args <= 8) { - _PyStackRef stack_array[8]; arguments = stack_array; } else { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 42bdbd9ca8d0cd..bff9121926b2c4 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -193,8 +193,7 @@ dummy_func(void) { { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); - PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), - (PyLongObject *)sym_get_const(right)); + PyObject *temp = PyNumber_Add(sym_get_const(left), sym_get_const(right)); if (temp == NULL) { goto error; }