From 00bb424f22b7a108a8c9a56f6fe845dbaf0397ad Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 6 Nov 2024 12:55:17 -0800 Subject: [PATCH] Include stack effects of uops when computing maximum stack depth UOPs may want to pass values on the stack. This is currently only guaranteed to be possible if the net stack effect of the instruction is greater than the number of values that need to be passed. To ensure there is sufficient space on the stack for values passed between uops, we: 1. Compute the set of stack effects for each instruction by computing the stack effect after each uop in the instruction. 2. Collect the set of stack effects for all instructions in a family. 3. Generate a function that computes the maximum stack effect for each instruction. The maximum stack effect for a generic instruction is the maximum of all instructions in the family. 4. Use the maximum stack effect when computing the maximum stack depth of a function. --- Include/internal/pycore_opcode_metadata.h | 925 ++++++++++++++++++ Lib/test/test_generated_cases.py | 161 ++- Python/flowgraph.c | 69 +- .../opcode_metadata_generator.py | 94 +- Tools/cases_generator/stack.py | 48 +- 5 files changed, 1262 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 58e583eabbcc468..a0fb6bf9514c5d6 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -949,6 +949,931 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { #endif +extern int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect); +#ifdef NEED_OPCODE_METADATA +int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { + switch(opcode) { + case BINARY_OP: { + *effect = 0; + return 0; + } + case BINARY_OP_ADD_FLOAT: { + *effect = 0; + return 0; + } + case BINARY_OP_ADD_INT: { + *effect = 0; + return 0; + } + case BINARY_OP_ADD_UNICODE: { + *effect = 0; + return 0; + } + case BINARY_OP_INPLACE_ADD_UNICODE: { + *effect = 0; + return 0; + } + case BINARY_OP_MULTIPLY_FLOAT: { + *effect = 0; + return 0; + } + case BINARY_OP_MULTIPLY_INT: { + *effect = 0; + return 0; + } + case BINARY_OP_SUBTRACT_FLOAT: { + *effect = 0; + return 0; + } + case BINARY_OP_SUBTRACT_INT: { + *effect = 0; + return 0; + } + case BINARY_SLICE: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR_DICT: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR_GETITEM: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR_LIST_INT: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR_STR_INT: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR_TUPLE_INT: { + *effect = 0; + return 0; + } + case BUILD_LIST: { + *effect = (-oparg) + (1); + return 0; + } + case BUILD_MAP: { + *effect = (-oparg*2) + (1); + return 0; + } + case BUILD_SET: { + *effect = (-oparg) + (1); + return 0; + } + case BUILD_SLICE: { + *effect = (-2 - ((oparg == 3) ? 1 : 0)) + (1); + return 0; + } + case BUILD_STRING: { + *effect = (-oparg) + (1); + return 0; + } + case BUILD_TUPLE: { + *effect = (-oparg) + (1); + return 0; + } + case CACHE: { + *effect = 0; + return 0; + } + case CALL: { + int max_eff = Py_MAX((-2 - oparg) + (0), (-2 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-2 - oparg) + (2 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_ALLOC_AND_ENTER_INIT: { + int max_eff = Py_MAX((-2 - oparg) + (0), (-2 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-2 - oparg) + (2 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_BOUND_METHOD_EXACT_ARGS: { + int max_eff = Py_MAX((-2 - oparg) + (0), (-2 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-2 - oparg) + (2 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_BOUND_METHOD_GENERAL: { + int max_eff = Py_MAX((-2 - oparg) + (0), (-2 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-2 - oparg) + (2 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_BUILTIN_CLASS: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_BUILTIN_FAST: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_BUILTIN_FAST_WITH_KEYWORDS: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_BUILTIN_O: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_FUNCTION_EX: { + *effect = Py_MAX((-3 - (oparg & 1)) + (1), (-3 - (oparg & 1)) + (3 + (oparg & 1))); + return 0; + } + case CALL_INTRINSIC_1: { + *effect = 0; + return 0; + } + case CALL_INTRINSIC_2: { + *effect = 0; + return 0; + } + case CALL_ISINSTANCE: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_KW: { + int max_eff = Py_MAX((-3 - oparg) + (0), (-3 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-3 - oparg) + (3 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_KW_BOUND_METHOD: { + int max_eff = Py_MAX((-3 - oparg) + (0), (-3 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-3 - oparg) + (3 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_KW_NON_PY: { + *effect = Py_MAX((-3 - oparg) + (1), (-3 - oparg) + (3 + oparg)); + return 0; + } + case CALL_KW_PY: { + int max_eff = Py_MAX((-3 - oparg) + (0), (-3 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-3 - oparg) + (3 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_LEN: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_LIST_APPEND: { + *effect = 0; + return 0; + } + case CALL_METHOD_DESCRIPTOR_FAST: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_METHOD_DESCRIPTOR_NOARGS: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_METHOD_DESCRIPTOR_O: { + *effect = (-2 - oparg) + (1); + return 0; + } + case CALL_NON_PY_GENERAL: { + *effect = Py_MAX((-2 - oparg) + (1), (-2 - oparg) + (2 + oparg)); + return 0; + } + case CALL_PY_EXACT_ARGS: { + int max_eff = Py_MAX((-2 - oparg) + (0), (-2 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-2 - oparg) + (2 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_PY_GENERAL: { + int max_eff = Py_MAX((-2 - oparg) + (0), (-2 - oparg) + (1)); + max_eff = Py_MAX(max_eff, (-2 - oparg) + (2 + oparg)); + *effect = max_eff; + return 0; + } + case CALL_STR_1: { + *effect = 0; + return 0; + } + case CALL_TUPLE_1: { + *effect = 0; + return 0; + } + case CALL_TYPE_1: { + *effect = 0; + return 0; + } + case CHECK_EG_MATCH: { + *effect = 0; + return 0; + } + case CHECK_EXC_MATCH: { + *effect = 0; + return 0; + } + case CLEANUP_THROW: { + *effect = 0; + return 0; + } + case COMPARE_OP: { + *effect = 0; + return 0; + } + case COMPARE_OP_FLOAT: { + *effect = 0; + return 0; + } + case COMPARE_OP_INT: { + *effect = 0; + return 0; + } + case COMPARE_OP_STR: { + *effect = 0; + return 0; + } + case CONTAINS_OP: { + *effect = 0; + return 0; + } + case CONTAINS_OP_DICT: { + *effect = 0; + return 0; + } + case CONTAINS_OP_SET: { + *effect = 0; + return 0; + } + case CONVERT_VALUE: { + *effect = 0; + return 0; + } + case COPY: { + *effect = (-1 - (oparg-1)) + (2 + (oparg-1)); + return 0; + } + case COPY_FREE_VARS: { + *effect = 0; + return 0; + } + case DELETE_ATTR: { + *effect = 0; + return 0; + } + case DELETE_DEREF: { + *effect = 0; + return 0; + } + case DELETE_FAST: { + *effect = 0; + return 0; + } + case DELETE_GLOBAL: { + *effect = 0; + return 0; + } + case DELETE_NAME: { + *effect = 0; + return 0; + } + case DELETE_SUBSCR: { + *effect = 0; + return 0; + } + case DICT_MERGE: { + *effect = (-5 - (oparg - 1)) + (4 + (oparg - 1)); + return 0; + } + case DICT_UPDATE: { + *effect = (-2 - (oparg - 1)) + (1 + (oparg - 1)); + return 0; + } + case END_ASYNC_FOR: { + *effect = 0; + return 0; + } + case END_FOR: { + *effect = 0; + return 0; + } + case END_SEND: { + *effect = 0; + return 0; + } + case ENTER_EXECUTOR: { + *effect = 0; + return 0; + } + case EXIT_INIT_CHECK: { + *effect = 0; + return 0; + } + case EXTENDED_ARG: { + *effect = 0; + return 0; + } + case FORMAT_SIMPLE: { + *effect = 0; + return 0; + } + case FORMAT_WITH_SPEC: { + *effect = 0; + return 0; + } + case FOR_ITER: { + *effect = 1; + return 0; + } + case FOR_ITER_GEN: { + *effect = 1; + return 0; + } + case FOR_ITER_LIST: { + *effect = 1; + return 0; + } + case FOR_ITER_RANGE: { + *effect = 1; + return 0; + } + case FOR_ITER_TUPLE: { + *effect = 1; + return 0; + } + case GET_AITER: { + *effect = 0; + return 0; + } + case GET_ANEXT: { + *effect = 1; + return 0; + } + case GET_AWAITABLE: { + *effect = 0; + return 0; + } + case GET_ITER: { + *effect = 0; + return 0; + } + case GET_LEN: { + *effect = 1; + return 0; + } + case GET_YIELD_FROM_ITER: { + *effect = 0; + return 0; + } + case IMPORT_FROM: { + *effect = 1; + return 0; + } + case IMPORT_NAME: { + *effect = 0; + return 0; + } + case INSTRUMENTED_CALL: { + *effect = Py_MAX((-2 - oparg) + (1), (-2 - oparg) + (2 + oparg)); + return 0; + } + case INSTRUMENTED_CALL_FUNCTION_EX: { + *effect = 0; + return 0; + } + case INSTRUMENTED_CALL_KW: { + *effect = 0; + return 0; + } + case INSTRUMENTED_END_FOR: { + *effect = 0; + return 0; + } + case INSTRUMENTED_END_SEND: { + *effect = 0; + return 0; + } + case INSTRUMENTED_FOR_ITER: { + *effect = 0; + return 0; + } + case INSTRUMENTED_INSTRUCTION: { + *effect = 0; + return 0; + } + case INSTRUMENTED_JUMP_BACKWARD: { + *effect = 0; + return 0; + } + case INSTRUMENTED_JUMP_FORWARD: { + *effect = 0; + return 0; + } + case INSTRUMENTED_LINE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_LOAD_SUPER_ATTR: { + *effect = 0; + return 0; + } + case INSTRUMENTED_POP_JUMP_IF_FALSE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_POP_JUMP_IF_NONE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_POP_JUMP_IF_TRUE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_RESUME: { + *effect = 0; + return 0; + } + case INSTRUMENTED_RETURN_VALUE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_YIELD_VALUE: { + *effect = 0; + return 0; + } + case INTERPRETER_EXIT: { + *effect = 0; + return 0; + } + case IS_OP: { + *effect = 0; + return 0; + } + case JUMP: { + *effect = 0; + return 0; + } + case JUMP_BACKWARD: { + *effect = 0; + return 0; + } + case JUMP_BACKWARD_NO_INTERRUPT: { + *effect = 0; + return 0; + } + case JUMP_FORWARD: { + *effect = 0; + return 0; + } + case JUMP_IF_FALSE: { + *effect = 0; + return 0; + } + case JUMP_IF_TRUE: { + *effect = 0; + return 0; + } + case JUMP_NO_INTERRUPT: { + *effect = 0; + return 0; + } + case LIST_APPEND: { + *effect = (-2 - (oparg-1)) + (1 + (oparg-1)); + return 0; + } + case LIST_EXTEND: { + *effect = (-2 - (oparg-1)) + (1 + (oparg-1)); + return 0; + } + case LOAD_ATTR: { + *effect = Py_MAX(1, (-1) + (1 + (oparg & 1))); + return 0; + } + case LOAD_ATTR_CLASS: { + *effect = (-1) + (1 + (oparg & 1)); + return 0; + } + case LOAD_ATTR_CLASS_WITH_METACLASS_CHECK: { + *effect = (-1) + (1 + (oparg & 1)); + return 0; + } + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { + *effect = 0; + return 0; + } + case LOAD_ATTR_INSTANCE_VALUE: { + *effect = (-1) + (1 + (oparg & 1)); + return 0; + } + case LOAD_ATTR_METHOD_LAZY_DICT: { + *effect = 1; + return 0; + } + case LOAD_ATTR_METHOD_NO_DICT: { + *effect = 1; + return 0; + } + case LOAD_ATTR_METHOD_WITH_VALUES: { + *effect = 1; + return 0; + } + case LOAD_ATTR_MODULE: { + *effect = (-1) + (1 + (oparg & 1)); + return 0; + } + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + *effect = 0; + return 0; + } + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + *effect = 0; + return 0; + } + case LOAD_ATTR_PROPERTY: { + *effect = 0; + return 0; + } + case LOAD_ATTR_SLOT: { + *effect = (-1) + (1 + (oparg & 1)); + return 0; + } + case LOAD_ATTR_WITH_HINT: { + *effect = (-1) + (1 + (oparg & 1)); + return 0; + } + case LOAD_BUILD_CLASS: { + *effect = 1; + return 0; + } + case LOAD_CLOSURE: { + *effect = 1; + return 0; + } + case LOAD_COMMON_CONSTANT: { + *effect = 1; + return 0; + } + case LOAD_CONST: { + *effect = 1; + return 0; + } + case LOAD_CONST_IMMORTAL: { + *effect = 1; + return 0; + } + case LOAD_DEREF: { + *effect = 1; + return 0; + } + case LOAD_FAST: { + *effect = 1; + return 0; + } + case LOAD_FAST_AND_CLEAR: { + *effect = 1; + return 0; + } + case LOAD_FAST_CHECK: { + *effect = 1; + return 0; + } + case LOAD_FAST_LOAD_FAST: { + *effect = 2; + return 0; + } + case LOAD_FROM_DICT_OR_DEREF: { + *effect = 0; + return 0; + } + case LOAD_FROM_DICT_OR_GLOBALS: { + *effect = 0; + return 0; + } + case LOAD_GLOBAL: { + *effect = Py_MAX(1, (0) + (1 + (oparg & 1))); + return 0; + } + case LOAD_GLOBAL_BUILTIN: { + *effect = Py_MAX(1, (0) + (1 + (oparg & 1))); + return 0; + } + case LOAD_GLOBAL_MODULE: { + *effect = Py_MAX(1, (0) + (1 + (oparg & 1))); + return 0; + } + case LOAD_LOCALS: { + *effect = 1; + return 0; + } + case LOAD_NAME: { + *effect = 1; + return 0; + } + case LOAD_SMALL_INT: { + *effect = 1; + return 0; + } + case LOAD_SPECIAL: { + *effect = 1; + return 0; + } + case LOAD_SUPER_ATTR: { + *effect = (-3) + (1 + (oparg & 1)); + return 0; + } + case LOAD_SUPER_ATTR_ATTR: { + *effect = 0; + return 0; + } + case LOAD_SUPER_ATTR_METHOD: { + *effect = 0; + return 0; + } + case MAKE_CELL: { + *effect = 0; + return 0; + } + case MAKE_FUNCTION: { + *effect = 0; + return 0; + } + case MAP_ADD: { + *effect = (-3 - (oparg - 1)) + (1 + (oparg - 1)); + return 0; + } + case MATCH_CLASS: { + *effect = 0; + return 0; + } + case MATCH_KEYS: { + *effect = 1; + return 0; + } + case MATCH_MAPPING: { + *effect = 1; + return 0; + } + case MATCH_SEQUENCE: { + *effect = 1; + return 0; + } + case NOP: { + *effect = 0; + return 0; + } + case POP_BLOCK: { + *effect = 0; + return 0; + } + case POP_EXCEPT: { + *effect = 0; + return 0; + } + case POP_JUMP_IF_FALSE: { + *effect = 0; + return 0; + } + case POP_JUMP_IF_NONE: { + *effect = 0; + return 0; + } + case POP_JUMP_IF_NOT_NONE: { + *effect = 0; + return 0; + } + case POP_JUMP_IF_TRUE: { + *effect = 0; + return 0; + } + case POP_TOP: { + *effect = 0; + return 0; + } + case PUSH_EXC_INFO: { + *effect = 1; + return 0; + } + case PUSH_NULL: { + *effect = 1; + return 0; + } + case RAISE_VARARGS: { + *effect = (-oparg) + (0); + return 0; + } + case RERAISE: { + *effect = (-1 - oparg) + (oparg); + return 0; + } + case RESERVED: { + *effect = 0; + return 0; + } + case RESUME: { + *effect = 0; + return 0; + } + case RESUME_CHECK: { + *effect = 0; + return 0; + } + case RETURN_GENERATOR: { + *effect = 1; + return 0; + } + case RETURN_VALUE: { + *effect = 0; + return 0; + } + case SEND: { + *effect = 0; + return 0; + } + case SEND_GEN: { + *effect = 0; + return 0; + } + case SETUP_ANNOTATIONS: { + *effect = 0; + return 0; + } + case SETUP_CLEANUP: { + *effect = 2; + return 0; + } + case SETUP_FINALLY: { + *effect = 1; + return 0; + } + case SETUP_WITH: { + *effect = 1; + return 0; + } + case SET_ADD: { + *effect = (-2 - (oparg-1)) + (1 + (oparg-1)); + return 0; + } + case SET_FUNCTION_ATTRIBUTE: { + *effect = 0; + return 0; + } + case SET_UPDATE: { + *effect = (-2 - (oparg-1)) + (1 + (oparg-1)); + return 0; + } + case STORE_ATTR: { + *effect = 0; + return 0; + } + case STORE_ATTR_INSTANCE_VALUE: { + *effect = 0; + return 0; + } + case STORE_ATTR_SLOT: { + *effect = 0; + return 0; + } + case STORE_ATTR_WITH_HINT: { + *effect = 0; + return 0; + } + case STORE_DEREF: { + *effect = 0; + return 0; + } + case STORE_FAST: { + *effect = 0; + return 0; + } + case STORE_FAST_LOAD_FAST: { + *effect = 0; + return 0; + } + case STORE_FAST_MAYBE_NULL: { + *effect = 0; + return 0; + } + case STORE_FAST_STORE_FAST: { + *effect = 0; + return 0; + } + case STORE_GLOBAL: { + *effect = 0; + return 0; + } + case STORE_NAME: { + *effect = 0; + return 0; + } + case STORE_SLICE: { + *effect = 0; + return 0; + } + case STORE_SUBSCR: { + *effect = 0; + return 0; + } + case STORE_SUBSCR_DICT: { + *effect = 0; + return 0; + } + case STORE_SUBSCR_LIST_INT: { + *effect = 0; + return 0; + } + case SWAP: { + *effect = (-2 - (oparg-2)) + (2 + (oparg-2)); + return 0; + } + case TO_BOOL: { + *effect = 0; + return 0; + } + case TO_BOOL_ALWAYS_TRUE: { + *effect = 0; + return 0; + } + case TO_BOOL_BOOL: { + *effect = 0; + return 0; + } + case TO_BOOL_INT: { + *effect = 0; + return 0; + } + case TO_BOOL_LIST: { + *effect = 0; + return 0; + } + case TO_BOOL_NONE: { + *effect = 0; + return 0; + } + case TO_BOOL_STR: { + *effect = 0; + return 0; + } + case UNARY_INVERT: { + *effect = 0; + return 0; + } + case UNARY_NEGATIVE: { + *effect = 0; + return 0; + } + case UNARY_NOT: { + *effect = 0; + return 0; + } + case UNPACK_EX: { + *effect = (-1) + (1 + (oparg & 0xFF) + (oparg >> 8)); + return 0; + } + case UNPACK_SEQUENCE: { + *effect = Py_MAX(1, (-1) + (oparg)); + return 0; + } + case UNPACK_SEQUENCE_LIST: { + *effect = (-1) + (oparg); + return 0; + } + case UNPACK_SEQUENCE_TUPLE: { + *effect = (-1) + (oparg); + return 0; + } + case UNPACK_SEQUENCE_TWO_TUPLE: { + *effect = 1; + return 0; + } + case WITH_EXCEPT_START: { + *effect = 1; + return 0; + } + case YIELD_VALUE: { + *effect = 0; + return 0; + } + default: + return -1; + } +} + +#endif + enum InstructionFormat { INSTR_FMT_IB = 1, INSTR_FMT_IBC = 2, diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index ff9a52b7adac8a5..b9a5905e8c0cf8d 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1,9 +1,12 @@ import contextlib import os +import re import sys import tempfile +import textwrap import unittest +from io import StringIO from test import support from test import test_tools @@ -29,10 +32,12 @@ def skip_if_different_mount_drives(): test_tools.skip_if_missing("cases_generator") with test_tools.imports_under_tool("cases_generator"): - from analyzer import StackItem + from analyzer import analyze_forest, StackItem + from cwriter import CWriter import parser from stack import Local, Stack import tier1_generator + import opcode_metadata_generator import optimizer_generator @@ -43,6 +48,14 @@ def handle_stderr(): return support.captured_stderr() +def parse_src(src): + p = parser.Parser(src, "test.c") + nodes = [] + while node := p.definition(): + nodes.append(node) + return nodes + + class TestEffects(unittest.TestCase): def test_effect_sizes(self): stack = Stack() @@ -65,6 +78,152 @@ def test_effect_sizes(self): self.assertEqual(stack.top_offset.to_c(), "1 - oparg - oparg*2 + oparg*4") +class TestGenerateMaxStackEffect(unittest.TestCase): + def check(self, input, output): + analysis = analyze_forest(parse_src(input)) + buf = StringIO() + writer = CWriter(buf, 0, False) + opcode_metadata_generator.generate_max_stack_effect_function(analysis, writer) + buf.seek(0) + generated = buf.read() + matches = re.search(r"(case OP: {[^}]+})", generated) + if matches is None: + self.fail(f"Couldn't find case statement for OP in:\n {generated}") + self.assertEqual(output.strip(), matches.group(1)) + + def test_push_one(self): + input = """ + inst(OP, (a -- b, c)) { + SPAM(); + } + """ + output = """ + case OP: { + *effect = 1; + return 0; + } + """ + self.check(input, output) + + def test_cond_push(self): + input = """ + inst(OP, (a -- b, c if (oparg))) { + SPAM(); + } + """ + output = """ + case OP: { + *effect = (-1) + (1 + ((oparg) ? 1 : 0)); + return 0; + } + """ + self.check(input, output) + + def test_ops_pass_two(self): + input = """ + op(A, (-- val1)) { + val1 = SPAM(); + } + op(B, (-- val2)) { + val2 = SPAM(); + } + op(C, (val1, val2 --)) { + } + macro(OP) = A + B + C; + """ + output = """ + case OP: { + *effect = 2; + return 0; + } + """ + self.check(input, output) + + def test_ops_pass_two_cond_push(self): + input = """ + op(A, (-- val1, val2)) { + val1 = 0; + val2 = 1; + } + op(B, (val1, val2 -- val1, val2, val3 if (oparg))) { + val3 = SPAM(); + } + macro(OP) = A + B; + """ + output = """ + case OP: { + *effect = Py_MAX(2, (0) + (2 + ((oparg) ? 1 : 0))); + return 0; + } + """ + self.check(input, output) + + def test_push_array(self): + input = """ + inst(OP, (values[oparg] -- values[oparg], above)) { + SPAM(values, oparg); + above = 0; + } + """ + output = """ + case OP: { + *effect = (-oparg) + (1 + oparg); + return 0; + } + """ + self.check(input, output) + + def test_family(self): + input = """ + op(A, (-- val1, val2)) { + val1 = 0; + val2 = 1; + } + op(B, (val1, val2 -- val3)) { + val3 = 2; + } + macro(OP1) = A + B; + + inst(OP, (-- val)) { + val = 0; + } + + family(OP, 0) = { OP1 }; + """ + output = """ + case OP: { + *effect = 2; + return 0; + } + """ + self.check(input, output) + + def test_family_intermediate_array(self): + input = """ + op(A, (-- values[oparg])) { + val1 = 0; + val2 = 1; + } + op(B, (values[oparg] -- val3)) { + val3 = 2; + } + macro(OP1) = A + B; + + inst(OP, (-- val)) { + val = 0; + } + + family(OP, 0) = { OP1 }; + """ + output = """ + case OP: { + *effect = Py_MAX(1, (0) + (oparg)); + return 0; + } + """ + self.check(input, output) + + class TestGeneratedCases(unittest.TestCase): def setUp(self) -> None: super().setUp() diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 5418131950076d6..ad81875baa2af7e 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -733,7 +733,7 @@ make_cfg_traversal_stack(basicblock *entryblock) { return stack; } -/* Return the stack effect of opcode with argument oparg. +/* Compute the stack effects of opcode with argument oparg. Some opcodes have different stack effect when jump to the target and when not jump. The 'jump' parameter specifies the case: @@ -742,25 +742,41 @@ make_cfg_traversal_stack(basicblock *entryblock) { * 1 -- when jump * -1 -- maximal */ +typedef struct { + /* The stack effect of the instruction after it has finished execution */ + int net; + + /* The maximum stack effect of the instruction. Some instructions may + * temporarily push extra values to the stack while they are executing. + */ + int max; +} stack_effects; + Py_LOCAL(int) -stack_effect(int opcode, int oparg, int jump) +get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects) { if (opcode < 0) { - return PY_INVALID_STACK_EFFECT; + return -1; } if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) { // Specialized instructions are not supported. - return PY_INVALID_STACK_EFFECT; + return -1; } int popped = _PyOpcode_num_popped(opcode, oparg); int pushed = _PyOpcode_num_pushed(opcode, oparg); if (popped < 0 || pushed < 0) { - return PY_INVALID_STACK_EFFECT; + return -1; } if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) { + effects->net = 0; + effects->max = 0; return 0; } - return pushed - popped; + if (_PyOpcode_max_stack_effect(opcode, oparg, &effects->max) < 0) { + return -1; + } + effects->net = pushed - popped; + return 0; } Py_LOCAL_INLINE(int) @@ -807,35 +823,36 @@ calculate_stackdepth(cfg_builder *g) basicblock *next = b->b_next; for (int i = 0; i < b->b_iused; i++) { cfg_instr *instr = &b->b_instr[i]; - int effect = stack_effect(instr->i_opcode, instr->i_oparg, 0); - if (effect == PY_INVALID_STACK_EFFECT) { + stack_effects effects; + if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) { PyErr_Format(PyExc_SystemError, "Invalid stack effect for opcode=%d, arg=%i", instr->i_opcode, instr->i_oparg); goto error; } - int new_depth = depth + effect; + int new_depth = depth + effects.net; if (new_depth < 0) { - PyErr_Format(PyExc_ValueError, - "Invalid CFG, stack underflow"); - goto error; + PyErr_Format(PyExc_ValueError, + "Invalid CFG, stack underflow"); + return -1; } - if (new_depth > maxdepth) { - maxdepth = new_depth; + if (depth + effects.max < 0) { + PyErr_Format(PyExc_ValueError, + "Invalid CFG, stack underflow"); + return -1; } + maxdepth = Py_MAX(maxdepth, depth + effects.max); if (HAS_TARGET(instr->i_opcode)) { - effect = stack_effect(instr->i_opcode, instr->i_oparg, 1); - if (effect == PY_INVALID_STACK_EFFECT) { + if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) { PyErr_Format(PyExc_SystemError, "Invalid stack effect for opcode=%d, arg=%i", instr->i_opcode, instr->i_oparg); goto error; } - int target_depth = depth + effect; + int target_depth = depth + effects.net; assert(target_depth >= 0); /* invalid code or bug in stackdepth() */ - if (target_depth > maxdepth) { - maxdepth = target_depth; - } + assert(depth + effects.max >= 0); + maxdepth = Py_MAX(maxdepth, depth + effects.max); if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) { goto error; } @@ -2936,13 +2953,21 @@ _PyCfg_JumpLabelsToTargets(cfg_builder *g) int PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump) { - return stack_effect(opcode, oparg, jump); + stack_effects effs; + if (get_stack_effects(opcode, oparg, jump, &effs) < 0) { + return PY_INVALID_STACK_EFFECT; + } + return effs.net; } int PyCompile_OpcodeStackEffect(int opcode, int oparg) { - return stack_effect(opcode, oparg, -1); + stack_effects effs; + if (get_stack_effects(opcode, oparg, -1, &effs) < 0) { + return PY_INVALID_STACK_EFFECT; + } + return effs.net; } /* Access to compiler optimizations for unit tests. diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 2ad7604af9cc0d2..225dbd9053401f4 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -19,8 +19,9 @@ cflags, ) from cwriter import CWriter +from dataclasses import dataclass from typing import TextIO -from stack import get_stack_effect +from stack import Stack, get_stack_effect, get_stack_effects # Constants used instead of size for macro expansions. # Note: 1, 2, 4 must match actual cache entry sizes. @@ -107,8 +108,99 @@ def add(inst: Instruction | PseudoInstruction) -> None: emit_stack_effect_function(out, "popped", sorted(popped_data)) emit_stack_effect_function(out, "pushed", sorted(pushed_data)) + generate_max_stack_effect_function(analysis, out) + + +def emit_max_stack_effect_function( + out: CWriter, effects: list[tuple[str, list[str]]] +) -> None: + out.emit("extern int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect);\n") + out.emit("#ifdef NEED_OPCODE_METADATA\n") + out.emit(f"int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) {{\n") + out.emit("switch(opcode) {\n") + for name, exprs in effects: + out.emit(f"case {name}: {{\n") + if len(exprs) == 1: + out.emit(f"*effect = {exprs[0]};\n") + elif len(exprs) == 2: + out.emit(f"*effect = Py_MAX({exprs[0]}, {exprs[1]});\n") + else: + assert len(exprs) > 2 + out.emit(f"int max_eff = Py_MAX({exprs[0]}, {exprs[1]});\n") + for expr in exprs[2:]: + out.emit(f"max_eff = Py_MAX(max_eff, {expr});\n") + out.emit(f"*effect = max_eff;\n") + out.emit(f"return 0;\n") + out.emit("}\n") + out.emit("default:\n") + out.emit(" return -1;\n") + out.emit("}\n") + out.emit("}\n\n") + out.emit("#endif\n\n") + + +@dataclass +class MaxStackEffectSet: + int_effect: int + cond_effects: set[str] + + def __init__(self) -> None: + self.int_effect = 0 + self.cond_effects = set() + + def update(self, other: MaxStackEffectSet) -> None: + self.int_effect = max(self.int_effect, other.int_effect) + self.cond_effects.update(other.cond_effects) + + +def generate_max_stack_effect_function(analysis: Analysis, out: CWriter) -> None: + """Generate a function that returns the maximum stack effect of an + instruction while it is executing. + + Specialized instructions that are composed of uops may have a greater stack + effect during instruction execution than the net stack effect of the + instruction if the uops pass values on the stack. + """ + effects: dict[str, MaxStackEffectSet] = {} + + def add(inst: Instruction | PseudoInstruction) -> None: + inst_effect = MaxStackEffectSet() + for stack in get_stack_effects(inst): + popped = stack.base_offset + pushed = stack.top_offset - stack.base_offset + popped_int, pushed_int = popped.as_int(), pushed.as_int() + if popped_int is not None and pushed_int is not None: + int_effect = popped_int + pushed_int + if int_effect > inst_effect.int_effect: + inst_effect.int_effect = int_effect + else: + inst_effect.cond_effects.add(f"({popped.to_c()}) + ({pushed.to_c()})") + effects[inst.name] = inst_effect + + # Collect unique stack effects for each instruction + for inst in analysis.instructions.values(): + add(inst) + for pseudo in analysis.pseudos.values(): + add(pseudo) + + # Merge the effects of all specializations in a family into the generic + # instruction + for family in analysis.families.values(): + for inst in family.members: + effects[family.name].update(effects[inst.name]) + + data: list[tuple[str, list[str]]] = [] + for name, effects in sorted(effects.items(), key=lambda kv: kv[0]): + exprs = [] + if effects.int_effect or not effects.cond_effects: + exprs.append(str(effects.int_effect)) + exprs.extend(sorted(effects.cond_effects)) + data.append((name, exprs)) + emit_max_stack_effect_function(out, data) + def generate_is_pseudo(analysis: Analysis, out: CWriter) -> None: + """Write the IS_PSEUDO_INSTR macro""" out.emit("\n\n#define IS_PSEUDO_INSTR(OP) ( \\\n") for op in analysis.pseudos: diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index a954bed4df073cf..5397b10e85d79b3 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -1,8 +1,9 @@ import re from analyzer import StackItem, StackEffect, Instruction, Uop, PseudoInstruction +from collections import defaultdict from dataclasses import dataclass from cwriter import CWriter -from typing import Iterator +from typing import Iterator, Tuple UNUSED = {"unused"} @@ -163,7 +164,7 @@ def simplify(self) -> None: self.pushed.sort() self.popped.sort() - def to_c(self) -> str: + def as_parts(self) -> Tuple[int, str]: self.simplify() int_offset = 0 symbol_offset = "" @@ -177,6 +178,10 @@ def to_c(self) -> str: int_offset += int(item) except ValueError: symbol_offset += f" + {maybe_parenthesize(item)}" + return int_offset, symbol_offset + + def to_c(self) -> str: + int_offset, symbol_offset = self.as_parts() if symbol_offset and not int_offset: res = symbol_offset else: @@ -385,18 +390,37 @@ def merge(self, other: "Stack", out: CWriter) -> None: self.align(other, out) +def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]: + if isinstance(inst, Instruction): + for uop in inst.parts: + if isinstance(uop, Uop): + yield uop.stack + else: + assert isinstance(inst, PseudoInstruction) + yield inst.stack + + def get_stack_effect(inst: Instruction | PseudoInstruction) -> Stack: stack = Stack() + for s in stacks(inst): + locals: dict[str, Local] = {} + for var in reversed(s.inputs): + _, local = stack.pop(var) + if var.name != "unused": + locals[local.name] = local + for var in s.outputs: + if var.name in locals: + local = locals[var.name] + else: + local = Local.unused(var) + stack.push(local) + return stack - def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]: - if isinstance(inst, Instruction): - for uop in inst.parts: - if isinstance(uop, Uop): - yield uop.stack - else: - assert isinstance(inst, PseudoInstruction) - yield inst.stack +def get_stack_effects(inst: Instruction | PseudoInstruction) -> list[Stack]: + """Returns a list of stack effects after each uop""" + result = [] + stack = Stack() for s in stacks(inst): locals: dict[str, Local] = {} for var in reversed(s.inputs): @@ -409,7 +433,9 @@ def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]: else: local = Local.unused(var) stack.push(local) - return stack + result.append(stack.copy()) + return result + @dataclass class Storage: