diff --git a/Lib/test/test_codecencodings_iso2022.py b/Lib/test/test_codecencodings_iso2022.py index 00ea1c39dd6fb6..027dbecc6134df 100644 --- a/Lib/test/test_codecencodings_iso2022.py +++ b/Lib/test/test_codecencodings_iso2022.py @@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase): (b'ab\x1BNdef', 'replace', 'abdef'), ) +class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_3' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(O\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(O\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'), + ('ab\u4FF1def', 'replace', b'ab?def'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(O\x29\x28\x1B(Bℜ\x1B$(O\x29\x32\x1B(B = ⟨ሴ⟩' + ) + +class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_2004' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(Q\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(Q\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'), + ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(Q\x29\x28\x1B(Bℜ\x1B$(Q\x29\x32\x1B(B = ⟨ሴ⟩' + ) + class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_kr' tstring = multibytecodec_support.load_teststring('iso2022_kr') diff --git a/Lib/test/test_tools/test_makeunicodedata.py b/Lib/test/test_tools/test_makeunicodedata.py index eee68676416c45..f31375117e2e92 100644 --- a/Lib/test/test_tools/test_makeunicodedata.py +++ b/Lib/test/test_tools/test_makeunicodedata.py @@ -1,5 +1,5 @@ import unittest -from test.test_tools import toolsdir, imports_under_tool +from test.test_tools import skip_if_missing, imports_under_tool from test import support from test.support.hypothesis_helper import hypothesis @@ -8,6 +8,7 @@ example = hypothesis.example +skip_if_missing("unicode") with imports_under_tool("unicode"): from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst new file mode 100644 index 00000000000000..268a3d310f2b49 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst @@ -0,0 +1 @@ +Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-03-19-25-38.gh-issue-111772.aRQvOn.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-03-19-25-38.gh-issue-111772.aRQvOn.rst new file mode 100644 index 00000000000000..79ae5ab4c8ff3b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-03-19-25-38.gh-issue-111772.aRQvOn.rst @@ -0,0 +1 @@ +Specialize slot loads and stores for _Py_T_OBJECT as well as Py_T_OBJECT_EX diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 86bb73b982a551..e8835ad0909633 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -207,8 +207,9 @@ ENCODER(iso2022) encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { + Py_UCS4 buf[2] = {c, 0}; Py_ssize_t length = 1; - encoded = dsg->encoder(codec, &c, &length); + encoded = dsg->encoder(codec, buf, &length); if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ @@ -217,9 +218,11 @@ ENCODER(iso2022) return MBERR_TOOFEW; length = -1; } - else + else { + buf[1] = INCHAR2; length = 2; - encoded = dsg->encoder(codec, &c, &length); + } + encoded = dsg->encoder(codec, buf, &length); if (encoded != MAP_UNMAPPABLE) { insize = length; break; diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 3b72651a1c0f74..3a10f622ccc6c7 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -808,13 +808,10 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno, void *Py_UNUSED(ignore while (start_stack > best_stack) { if (top_of_stack(start_stack) == Except) { /* Pop exception stack as well as the evaluation stack */ - PyThreadState *tstate = _PyThreadState_GET(); - _PyErr_StackItem *exc_info = tstate->exc_info; - PyObject *value = exc_info->exc_value; PyObject *exc = _PyFrame_StackPop(f->f_frame); assert(PyExceptionInstance_Check(exc) || exc == Py_None); - exc_info->exc_value = exc; - Py_XDECREF(value); + PyThreadState *tstate = _PyThreadState_GET(); + Py_XSETREF(tstate->exc_info->exc_value, exc); } else { PyObject *v = _PyFrame_StackPop(f->f_frame); diff --git a/Python/optimizer.c b/Python/optimizer.c index 0e5b4370ccb946..a332fd1c89582c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -384,34 +384,12 @@ PyTypeObject _PyUOpExecutor_Type = { .tp_methods = executor_methods, }; -static int -move_stubs( - _PyUOpInstruction *trace, - int trace_length, - int stubs_start, - int stubs_end -) -{ - memmove(trace + trace_length, - trace + stubs_start, - (stubs_end - stubs_start) * sizeof(_PyUOpInstruction)); - // Patch up the jump targets - for (int i = 0; i < trace_length; i++) { - if (trace[i].opcode == _POP_JUMP_IF_FALSE || - trace[i].opcode == _POP_JUMP_IF_TRUE) - { - int target = trace[i].oparg; - if (target >= stubs_start) { - target += trace_length - stubs_start; - trace[i].oparg = target; - } - } - } - return trace_length + stubs_end - stubs_start; -} - #define TRACE_STACK_SIZE 5 +/* Returns 1 on success, + * 0 if it failed to produce a worthwhile trace, + * and -1 on an error. + */ static int translate_bytecode_to_trace( PyCodeObject *code, @@ -790,7 +768,7 @@ translate_bytecode_to_trace( } assert(code == initial_code); // Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE - if (trace_length > 3) { + if (trace_length > 4) { ADD_TO_TRACE(_EXIT_TRACE, 0, 0); DPRINTF(1, "Created a trace for %s (%s:%d) at byte offset %d -- length %d+%d\n", @@ -800,25 +778,8 @@ translate_bytecode_to_trace( 2 * INSTR_IP(initial_instr, code), trace_length, buffer_size - max_length); - if (max_length < buffer_size) { - // There are stubs - if (trace_length < max_length) { - // There's a gap before the stubs - // Move the stubs back to be immediately after the main trace - // (which ends at trace_length) - DPRINTF(2, - "Moving %d stub uops back by %d\n", - buffer_size - max_length, - max_length - trace_length); - trace_length = move_stubs(trace, trace_length, max_length, buffer_size); - } - else { - assert(trace_length == max_length); - // There's no gap - trace_length = buffer_size; - } - } - return trace_length; + OPT_HIST(trace_length + buffer_size - max_length, trace_length_hist); + return 1; } else { OPT_STAT_INC(trace_too_short); @@ -838,70 +799,84 @@ translate_bytecode_to_trace( #undef DPRINTF } +#define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31))) +#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31))) +#define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31))) + +/* Count the number of used uops, and mark them in the bit vector `used`. + * This can be done in a single pass using simple reachability analysis, + * as there are no backward jumps. + * NOPs are excluded from the count. +*/ static int -remove_unneeded_uops(_PyUOpInstruction *trace, int trace_length) +compute_used(_PyUOpInstruction *buffer, uint32_t *used) { - // Stage 1: Replace unneeded _SET_IP uops with NOP. - // Note that we don't enter stubs, those SET_IPs are needed. - int last_set_ip = -1; - int last_instr = 0; - bool need_ip = true; - for (int pc = 0; pc < trace_length; pc++) { - int opcode = trace[pc].opcode; - if (opcode == _SET_IP) { - if (!need_ip && last_set_ip >= 0) { - trace[last_set_ip].opcode = NOP; - } - need_ip = false; - last_set_ip = pc; + int count = 0; + SET_BIT(used, 0); + for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) { + if (!BIT_IS_SET(used, i)) { + continue; } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { - last_instr = pc + 1; - break; + count++; + int opcode = buffer[i].opcode; + if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + continue; } - else { - // If opcode has ERROR or DEOPT, set need_ip to true - if (_PyOpcode_opcode_metadata[opcode].flags & (HAS_ERROR_FLAG | HAS_DEOPT_FLAG) || opcode == _PUSH_FRAME) { - need_ip = true; - } + /* All other micro-ops fall through, so i+1 is reachable */ + SET_BIT(used, i+1); + switch(opcode) { + case NOP: + /* Don't count NOPs as used */ + count--; + UNSET_BIT(used, i); + break; + case _POP_JUMP_IF_FALSE: + case _POP_JUMP_IF_TRUE: + /* Mark target as reachable */ + SET_BIT(used, buffer[i].oparg); } } - // Stage 2: Squash NOP opcodes (pre-existing or set above). - int dest = 0; - for (int pc = 0; pc < last_instr; pc++) { - int opcode = trace[pc].opcode; - if (opcode != NOP) { - if (pc != dest) { - trace[dest] = trace[pc]; - } - dest++; - } + return count; +} + +/* Makes an executor from a buffer of uops. + * Account for the buffer having gaps and NOPs by computing a "used" + * bit vector and only copying the used uops. Here "used" means reachable + * and not a NOP. + */ +static _PyExecutorObject * +make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) +{ + uint32_t used[(_Py_UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; + int length = compute_used(buffer, used); + _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &_PyUOpExecutor_Type, length); + if (executor == NULL) { + return NULL; } - // Stage 3: Move the stubs back. - if (dest < last_instr) { - int new_trace_length = move_stubs(trace, dest, last_instr, trace_length); -#ifdef Py_DEBUG - char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); - int lltrace = 0; - if (python_lltrace != NULL && *python_lltrace >= '0') { - lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that + int dest = length - 1; + /* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */ + for (int i = _Py_UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) { + if (!BIT_IS_SET(used, i)) { + continue; } - if (lltrace >= 2) { - printf("Optimized trace (length %d+%d = %d, saved %d):\n", - dest, trace_length - last_instr, new_trace_length, - trace_length - new_trace_length); - for (int pc = 0; pc < new_trace_length; pc++) { - printf("%4d: (%s, %d, %" PRIu64 ")\n", - pc, - uop_name(trace[pc].opcode), - (trace[pc].oparg), - (uint64_t)(trace[pc].operand)); - } + executor->trace[dest] = buffer[i]; + int opcode = buffer[i].opcode; + if (opcode == _POP_JUMP_IF_FALSE || + opcode == _POP_JUMP_IF_TRUE) + { + /* The oparg of the target will already have been set to its new offset */ + int oparg = executor->trace[dest].oparg; + executor->trace[dest].oparg = buffer[oparg].oparg; } -#endif - trace_length = new_trace_length; + /* Set the oparg to be the destination offset, + * so that we can set the oparg of earlier jumps correctly. */ + buffer[i].oparg = dest; + dest--; } - return trace_length; + assert(dest == -1); + executor->base.execute = _PyUopExecute; + _Py_ExecutorInit((_PyExecutorObject *)executor, dependencies); + return (_PyExecutorObject *)executor; } static int @@ -914,28 +889,26 @@ uop_optimize( { _PyBloomFilter dependencies; _Py_BloomFilter_Init(&dependencies); - _PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH]; - int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH, &dependencies); - if (trace_length <= 0) { + _PyUOpInstruction buffer[_Py_UOP_MAX_TRACE_LENGTH]; + int err = translate_bytecode_to_trace(code, instr, buffer, _Py_UOP_MAX_TRACE_LENGTH, &dependencies); + if (err <= 0) { // Error or nothing translated - return trace_length; + return err; } - OPT_HIST(trace_length, trace_length_hist); OPT_STAT_INC(traces_created); char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); - if (uop_optimize != NULL && *uop_optimize > '0') { - trace_length = _Py_uop_analyze_and_optimize(code, trace, trace_length, curr_stackentries); + if (uop_optimize == NULL || *uop_optimize > '0') { + err = _Py_uop_analyze_and_optimize(code, buffer, _Py_UOP_MAX_TRACE_LENGTH, curr_stackentries); + if (err < 0) { + return -1; + } } - trace_length = remove_unneeded_uops(trace, trace_length); - _PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &_PyUOpExecutor_Type, trace_length); + _PyExecutorObject *executor = make_executor_from_uops(buffer, &dependencies); if (executor == NULL) { return -1; } - OPT_HIST(trace_length, optimized_trace_length_hist); - executor->base.execute = _PyUopExecute; - memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); - _Py_ExecutorInit((_PyExecutorObject *)executor, &dependencies); - *exec_ptr = (_PyExecutorObject *)executor; + OPT_HIST(Py_SIZE(executor), optimized_trace_length_hist); + *exec_ptr = executor; return 1; } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2d177f14ff268b..61bda80ebe7ba5 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -13,13 +13,42 @@ #include "pycore_optimizer.h" +static void +remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) +{ + // Note that we don't enter stubs, those SET_IPs are needed. + int last_set_ip = -1; + bool need_ip = true; + for (int pc = 0; pc < buffer_size; pc++) { + int opcode = buffer[pc].opcode; + if (opcode == _SET_IP) { + if (!need_ip && last_set_ip >= 0) { + buffer[last_set_ip].opcode = NOP; + } + need_ip = false; + last_set_ip = pc; + } + else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + break; + } + else { + // If opcode has ERROR or DEOPT, set need_ip to true + if (_PyOpcode_opcode_metadata[opcode].flags & (HAS_ERROR_FLAG | HAS_DEOPT_FLAG) || opcode == _PUSH_FRAME) { + need_ip = true; + } + } + } +} + + int _Py_uop_analyze_and_optimize( PyCodeObject *co, - _PyUOpInstruction *trace, - int trace_len, + _PyUOpInstruction *buffer, + int buffer_size, int curr_stacklen ) { - return trace_len; + remove_unneeded_uops(buffer, buffer_size); + return 0; } diff --git a/Python/specialize.c b/Python/specialize.c index 41e74c67d8c9a7..ba704cbbb464d7 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -742,7 +742,7 @@ analyze_descriptor(PyTypeObject *type, PyObject *name, PyObject **descr, int sto if (desc_cls == &PyMemberDescr_Type) { PyMemberDescrObject *member = (PyMemberDescrObject *)descriptor; struct PyMemberDef *dmem = member->d_member; - if (dmem->type == Py_T_OBJECT_EX) { + if (dmem->type == Py_T_OBJECT_EX || dmem->type == _Py_T_OBJECT) { return OBJECT_SLOT; } return OTHER_SLOT; @@ -942,7 +942,7 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_RANGE); goto fail; } - assert(dmem->type == Py_T_OBJECT_EX); + assert(dmem->type == Py_T_OBJECT_EX || dmem->type == _Py_T_OBJECT); assert(offset > 0); cache->index = (uint16_t)offset; write_u32(cache->version, type->tp_version_tag); @@ -1082,7 +1082,7 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OUT_OF_RANGE); goto fail; } - assert(dmem->type == Py_T_OBJECT_EX); + assert(dmem->type == Py_T_OBJECT_EX || dmem->type == _Py_T_OBJECT); assert(offset > 0); cache->index = (uint16_t)offset; write_u32(cache->version, type->tp_version_tag);