From 8bc18182a7c28f86265c9d82bd0338137480921c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:16:22 +0100 Subject: [PATCH] gh-127691: add type checks when using `PyUnicodeError` objects (GH-127694) --- Doc/whatsnew/3.14.rst | 6 + ...-12-06-16-53-34.gh-issue-127691.k_Jitp.rst | 3 + Objects/exceptions.c | 216 ++++++++++++------ 3 files changed, 157 insertions(+), 68 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5ce398ab93d6b4..095949242c09d9 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1045,6 +1045,12 @@ New features * Add :c:func:`PyUnstable_Object_EnableDeferredRefcount` for enabling deferred reference counting, as outlined in :pep:`703`. +* The :ref:`Unicode Exception Objects ` C API + now raises a :exc:`TypeError` if its exception argument is not + a :exc:`UnicodeError` object. + (Contributed by Bénédikt Tran in :gh:`127691`.) + + Porting to Python 3.14 ---------------------- diff --git a/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst b/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst new file mode 100644 index 00000000000000..c942ff3d9eda53 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-06-16-53-34.gh-issue-127691.k_Jitp.rst @@ -0,0 +1,3 @@ +The :ref:`Unicode Exception Objects ` C API +now raises a :exc:`TypeError` if its exception argument is not +a :exc:`UnicodeError` object. Patch by Bénédikt Tran. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 287cbc25305964..6880c24196cbb8 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2668,7 +2668,7 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, "Unicode related error."); static PyObject * -get_string(PyObject *attr, const char *name) +get_bytes(PyObject *attr, const char *name) { if (!attr) { PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); @@ -2748,40 +2748,74 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) return end; } +#define _PyUnicodeError_CAST(PTR) ((PyUnicodeErrorObject *)(PTR)) +#define PyUnicodeError_Check(PTR) \ + PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError) +#define PyUnicodeError_CAST(PTR) \ + (assert(PyUnicodeError_Check(PTR)), _PyUnicodeError_CAST(PTR)) + + +static inline int +check_unicode_error_type(PyObject *self, const char *expect_type) +{ + if (!PyUnicodeError_Check(self)) { + PyErr_Format(PyExc_TypeError, + "expecting a %s object, got %T", expect_type, self); + return -1; + } + return 0; +} + + +static inline PyUnicodeErrorObject * +as_unicode_error(PyObject *self, const char *expect_type) +{ + int rc = check_unicode_error_type(self, expect_type); + return rc < 0 ? NULL : _PyUnicodeError_CAST(self); +} + PyObject * -PyUnicodeEncodeError_GetEncoding(PyObject *exc) +PyUnicodeEncodeError_GetEncoding(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding"); } PyObject * -PyUnicodeDecodeError_GetEncoding(PyObject *exc) +PyUnicodeDecodeError_GetEncoding(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding"); } PyObject * -PyUnicodeEncodeError_GetObject(PyObject *exc) +PyUnicodeEncodeError_GetObject(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->object, "object"); } PyObject * -PyUnicodeDecodeError_GetObject(PyObject *exc) +PyUnicodeDecodeError_GetObject(PyObject *self) { - return get_string(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_bytes(exc->object, "object"); } PyObject * -PyUnicodeTranslateError_GetObject(PyObject *exc) +PyUnicodeTranslateError_GetObject(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? NULL : get_unicode(exc->object, "object"); } int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + if (exc == NULL) { + return -1; + } PyObject *obj = get_unicode(exc->object, "object"); if (obj == NULL) { return -1; @@ -2796,8 +2830,11 @@ PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_string(exc->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_bytes(exc->object, "object"); if (obj == NULL) { return -1; } @@ -2809,45 +2846,63 @@ PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) int -PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - return PyUnicodeEncodeError_GetStart(exc, start); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { + return -1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); + return 0; } static inline int unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) { - ((PyUnicodeErrorObject *)self)->start = start; + PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self); + exc->start = start; return 0; } int -PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int -PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int -PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + if (exc == NULL) { + return -1; + } PyObject *obj = get_unicode(exc->object, "object"); if (obj == NULL) { return -1; @@ -2862,8 +2917,11 @@ PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_string(exc->object, "object"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_bytes(exc->object, "object"); if (obj == NULL) { return -1; } @@ -2875,108 +2933,130 @@ PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) int -PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - return PyUnicodeEncodeError_GetEnd(exc, end); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + if (exc == NULL) { + return -1; + } + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { + return -1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); + Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); + return 0; } static inline int -unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) +unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; + PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self); + exc->end = end; return 0; } int -PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } int -PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } int -PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } + PyObject * -PyUnicodeEncodeError_GetReason(PyObject *exc) +PyUnicodeEncodeError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } PyObject * -PyUnicodeDecodeError_GetReason(PyObject *exc) +PyUnicodeDecodeError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } PyObject * -PyUnicodeTranslateError_GetReason(PyObject *exc) +PyUnicodeTranslateError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); } int -PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason) +PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } int -PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason) +PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } int -PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason) +PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); + return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); } static int -UnicodeError_clear(PyUnicodeErrorObject *self) +UnicodeError_clear(PyObject *self) { - Py_CLEAR(self->encoding); - Py_CLEAR(self->object); - Py_CLEAR(self->reason); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + Py_CLEAR(exc->encoding); + Py_CLEAR(exc->object); + Py_CLEAR(exc->reason); return BaseException_clear((PyBaseExceptionObject *)self); } static void -UnicodeError_dealloc(PyUnicodeErrorObject *self) +UnicodeError_dealloc(PyObject *self) { + PyTypeObject *type = Py_TYPE(self); _PyObject_GC_UNTRACK(self); - UnicodeError_clear(self); - Py_TYPE(self)->tp_free((PyObject *)self); + (void)UnicodeError_clear(self); + type->tp_free(self); } static int -UnicodeError_traverse(PyUnicodeErrorObject *self, visitproc visit, void *arg) +UnicodeError_traverse(PyObject *self, visitproc visit, void *arg) { - Py_VISIT(self->encoding); - Py_VISIT(self->object); - Py_VISIT(self->reason); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + Py_VISIT(exc->encoding); + Py_VISIT(exc->object); + Py_VISIT(exc->reason); return BaseException_traverse((PyBaseExceptionObject *)self, visit, arg); } @@ -3015,7 +3095,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; @@ -3027,7 +3107,7 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeEncodeError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL; PyObject *encoding_str = NULL; @@ -3135,7 +3215,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) } } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->object, object /* already a strong reference */); exc->start = start; @@ -3147,7 +3227,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeDecodeError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL; PyObject *encoding_str = NULL; @@ -3236,7 +3316,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; exc->end = end; @@ -3248,7 +3328,7 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeTranslateError_str(PyObject *self) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *result = NULL; PyObject *reason_str = NULL;