diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index b36b4681f5dddb..49a6265e5eb02f 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -94,6 +94,12 @@ PyAPI_FUNC(void) _PyErr_ChainExceptions1(PyObject *); /* In exceptions.c */ +PyAPI_FUNC(int) _PyUnicodeError_GetParams( + PyObject *self, + PyObject **obj, Py_ssize_t *objlen, + Py_ssize_t *start, Py_ssize_t *end, + int as_bytes); + PyAPI_FUNC(PyObject*) PyUnstable_Exc_PrepReraiseStar( PyObject *orig, PyObject *excs); diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 6880c24196cbb8..714f8c828afbc1 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2667,47 +2667,167 @@ SimpleExtendsException(PyExc_Exception, ValueError, SimpleExtendsException(PyExc_ValueError, UnicodeError, "Unicode related error."); + +/* + * Check the validity of 'attr' as a unicode or bytes object depending + * on 'as_bytes' and return a new reference on it if it is the case. + * + * The 'name' is the attribute name and is only used for error reporting. + * + * On success, this returns a strong reference on 'attr'. + * On failure, this sets a TypeError and returns NULL. + */ static PyObject * -get_bytes(PyObject *attr, const char *name) +as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) { - if (!attr) { - PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); + assert(as_bytes == 0 || as_bytes == 1); + if (attr == NULL) { + PyErr_Format(PyExc_TypeError, "%s attribute not set", name); return NULL; } - - if (!PyBytes_Check(attr)) { - PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name); + if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { + PyErr_Format(PyExc_TypeError, + "%s attribute must be %s", + name, + as_bytes ? "bytes" : "unicode"); return NULL; } return Py_NewRef(attr); } -static PyObject * -get_unicode(PyObject *attr, const char *name) -{ - if (!attr) { - PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); - return NULL; - } - if (!PyUnicode_Check(attr)) { +#define PyUnicodeError_Check(PTR) \ + PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError) +#define PyUnicodeError_CAST(PTR) \ + (assert(PyUnicodeError_Check(PTR)), ((PyUnicodeErrorObject *)(PTR))) + + +/* class names to use when reporting errors */ +#define Py_UNICODE_ENCODE_ERROR_NAME "UnicodeEncodeError" +#define Py_UNICODE_DECODE_ERROR_NAME "UnicodeDecodeError" +#define Py_UNICODE_TRANSLATE_ERROR_NAME "UnicodeTranslateError" + + +/* + * Check that 'self' is a UnicodeError object. + * + * On success, this returns 0. + * On failure, this sets a TypeError exception and returns -1. + * + * The 'expect_type' is the name of the expected type, which is + * only used for error reporting. + * + * As an implementation detail, the `PyUnicode*Error_*` functions + * currently allow *any* subclass of UnicodeError as 'self'. + * + * Use one of the `Py_UNICODE_*_ERROR_NAME` macros to avoid typos. + */ +static inline int +check_unicode_error_type(PyObject *self, const char *expect_type) +{ + assert(self != NULL); + if (!PyUnicodeError_Check(self)) { PyErr_Format(PyExc_TypeError, - "%.200s attribute must be unicode", name); - return NULL; + "expecting a %s object, got %T", expect_type, self); + return -1; } - return Py_NewRef(attr); + return 0; } -static int -set_unicodefromstring(PyObject **attr, const char *value) + +// --- PyUnicodeEncodeObject: internal helpers -------------------------------- +// +// In the helpers below, the caller is responsible to ensure that 'self' +// is a PyUnicodeErrorObject, although this is verified on DEBUG builds +// through PyUnicodeError_CAST(). + +/* + * Return the underlying (str) 'encoding' attribute of a UnicodeError object. + */ +static inline PyObject * +unicode_error_get_encoding_impl(PyObject *self) +{ + assert(self != NULL); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + return as_unicode_error_attribute(exc->encoding, "encoding", false); +} + + +/* + * Return the underlying 'object' attribute of a UnicodeError object + * as a bytes or a string instance, depending on the 'as_bytes' flag. + */ +static inline PyObject * +unicode_error_get_object_impl(PyObject *self, int as_bytes) { - PyObject *obj = PyUnicode_FromString(value); - if (!obj) + assert(self != NULL); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + return as_unicode_error_attribute(exc->object, "object", as_bytes); +} + + +/* + * Return the underlying (str) 'reason' attribute of a UnicodeError object. + */ +static inline PyObject * +unicode_error_get_reason_impl(PyObject *self) +{ + assert(self != NULL); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + return as_unicode_error_attribute(exc->reason, "reason", false); +} + + +/* + * Set the underlying (str) 'reason' attribute of a UnicodeError object. + * + * Return 0 on success and -1 on failure. + */ +static inline int +unicode_error_set_reason_impl(PyObject *self, const char *reason) +{ + assert(self != NULL); + PyObject *value = PyUnicode_FromString(reason); + if (value == NULL) { return -1; - Py_XSETREF(*attr, obj); + } + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + Py_XSETREF(exc->reason, value); return 0; } + +/* + * Set the 'start' attribute of a UnicodeError object. + * + * Return 0 on success and -1 on failure. + */ +static inline int +unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) +{ + assert(self != NULL); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + exc->start = start; + return 0; +} + + +/* + * Set the 'end' attribute of a UnicodeError object. + * + * Return 0 on success and -1 on failure. + */ +static inline int +unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) +{ + assert(self != NULL); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + exc->end = end; + return 0; +} + +// --- PyUnicodeEncodeObject: internal getters -------------------------------- + /* * Adjust the (inclusive) 'start' value of a UnicodeError object. * @@ -2728,6 +2848,7 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) return start; } + /* * Adjust the (exclusive) 'end' value of a UnicodeError object. * @@ -2748,134 +2869,162 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) return end; } -#define _PyUnicodeError_CAST(PTR) ((PyUnicodeErrorObject *)(PTR)) -#define PyUnicodeError_Check(PTR) \ - PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError) -#define PyUnicodeError_CAST(PTR) \ - (assert(PyUnicodeError_Check(PTR)), _PyUnicodeError_CAST(PTR)) - -static inline int -check_unicode_error_type(PyObject *self, const char *expect_type) +/* + * Get various common parameters of a UnicodeError object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. + * + * Return 0 on success and -1 on failure. + * + * Output parameters: + * + * obj A strong reference to the 'object' attribute. + * objlen The 'object' length. + * start The clipped 'start' attribute. + * end The clipped 'end' attribute. + * + * An output parameter can be NULL to indicate that + * the corresponding value does not need to be stored. + * + * Input parameter: + * + * as_bytes If 1, the error's 'object' attribute must be a bytes object, + * i.e. the call is for a `UnicodeDecodeError`. Otherwise, the + * 'object' attribute must be a string. + * + * A TypeError is raised if the 'object' type is incompatible. + */ +int +_PyUnicodeError_GetParams(PyObject *self, + PyObject **obj, Py_ssize_t *objlen, + Py_ssize_t *start, Py_ssize_t *end, + int as_bytes) { - if (!PyUnicodeError_Check(self)) { - PyErr_Format(PyExc_TypeError, - "expecting a %s object, got %T", expect_type, self); + assert(self != NULL); + assert(as_bytes == 0 || as_bytes == 1); + PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); + PyObject *r = as_unicode_error_attribute(exc->object, "object", as_bytes); + if (r == NULL) { return -1; } + + Py_ssize_t n = as_bytes ? PyBytes_GET_SIZE(r) : PyUnicode_GET_LENGTH(r); + if (objlen != NULL) { + *objlen = n; + } + if (start != NULL) { + *start = unicode_error_adjust_start(exc->start, n); + assert(*start >= 0); + assert(*start <= n); + } + if (end != NULL) { + *end = unicode_error_adjust_end(exc->end, n); + assert(*end >= 0); + assert(*end <= n); + } + if (obj != NULL) { + *obj = r; + } + else { + Py_DECREF(r); + } return 0; } -static inline PyUnicodeErrorObject * -as_unicode_error(PyObject *self, const char *expect_type) -{ - int rc = check_unicode_error_type(self, expect_type); - return rc < 0 ? NULL : _PyUnicodeError_CAST(self); -} +// --- PyUnicodeEncodeObject: 'encoding' getters ------------------------------ +// Note: PyUnicodeTranslateError does not have an 'encoding' attribute. PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *self) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); - return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); + return rc < 0 ? NULL : unicode_error_get_encoding_impl(self); } + PyObject * PyUnicodeDecodeError_GetEncoding(PyObject *self) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); - return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); + return rc < 0 ? NULL : unicode_error_get_encoding_impl(self); } + +// --- PyUnicodeEncodeObject: 'object' getters -------------------------------- + PyObject * PyUnicodeEncodeError_GetObject(PyObject *self) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); - return exc == NULL ? NULL : get_unicode(exc->object, "object"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); + return rc < 0 ? NULL : unicode_error_get_object_impl(self, false); } + PyObject * PyUnicodeDecodeError_GetObject(PyObject *self) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); - return exc == NULL ? NULL : get_bytes(exc->object, "object"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); + return rc < 0 ? NULL : unicode_error_get_object_impl(self, true); } + PyObject * PyUnicodeTranslateError_GetObject(PyObject *self) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); - return exc == NULL ? NULL : get_unicode(exc->object, "object"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); + return rc < 0 ? NULL : unicode_error_get_object_impl(self, false); } + +// --- PyUnicodeEncodeObject: 'start' getters --------------------------------- + +/* + * Specialization of _PyUnicodeError_GetParams() for the 'start' attribute. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. + */ +static inline int +unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes) +{ + assert(self != NULL); + return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, as_bytes); +} + + int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); - if (exc == NULL) { - return -1; - } - PyObject *obj = get_unicode(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyUnicode_GET_LENGTH(obj); - Py_DECREF(obj); - *start = unicode_error_adjust_start(exc->start, size); - return 0; + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, false); } int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); - if (exc == NULL) { - return -1; - } - PyObject *obj = get_bytes(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyBytes_GET_SIZE(obj); - Py_DECREF(obj); - *start = unicode_error_adjust_start(exc->start, size); - return 0; + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, true); } int PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); - if (exc == NULL) { - return -1; - } - PyObject *obj = get_unicode(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyUnicode_GET_LENGTH(obj); - Py_DECREF(obj); - *start = unicode_error_adjust_start(exc->start, size); - return 0; + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, false); } -static inline int -unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) -{ - PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self); - exc->start = start; - return 0; -} - +// --- PyUnicodeEncodeObject: 'start' setters --------------------------------- int PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) { - int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } @@ -2883,7 +3032,7 @@ PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) int PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start) { - int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } @@ -2891,78 +3040,57 @@ PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start) int PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) { - int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } +// --- PyUnicodeEncodeObject: 'end' getters ----------------------------------- + +/* + * Specialization of _PyUnicodeError_GetParams() for the 'end' attribute. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. + */ +static inline int +unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes) +{ + assert(self != NULL); + return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, as_bytes); +} + + int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); - if (exc == NULL) { - return -1; - } - PyObject *obj = get_unicode(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyUnicode_GET_LENGTH(obj); - Py_DECREF(obj); - *end = unicode_error_adjust_end(exc->end, size); - return 0; + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, false); } int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); - if (exc == NULL) { - return -1; - } - PyObject *obj = get_bytes(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyBytes_GET_SIZE(obj); - Py_DECREF(obj); - *end = unicode_error_adjust_end(exc->end, size); - return 0; + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, true); } int PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); - if (exc == NULL) { - return -1; - } - PyObject *obj = get_unicode(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyUnicode_GET_LENGTH(obj); - Py_DECREF(obj); - *end = unicode_error_adjust_end(exc->end, size); - return 0; + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, false); } -static inline int -unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) -{ - PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self); - exc->end = end; - return 0; -} - +// --- PyUnicodeEncodeObject: 'end' setters ----------------------------------- int PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) { - int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } @@ -2970,7 +3098,7 @@ PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) int PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end) { - int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } @@ -2978,56 +3106,60 @@ PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end) int PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) { - int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } +// --- PyUnicodeEncodeObject: 'reason' getters -------------------------------- + PyObject * PyUnicodeEncodeError_GetReason(PyObject *self) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); - return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); + return rc < 0 ? NULL : unicode_error_get_reason_impl(self); } PyObject * PyUnicodeDecodeError_GetReason(PyObject *self) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); - return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); + return rc < 0 ? NULL : unicode_error_get_reason_impl(self); } PyObject * PyUnicodeTranslateError_GetReason(PyObject *self) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); - return exc == NULL ? NULL : get_unicode(exc->reason, "reason"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); + return rc < 0 ? NULL : unicode_error_get_reason_impl(self); } +// --- PyUnicodeEncodeObject: 'reason' setters -------------------------------- + int PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError"); - return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason); } int PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError"); - return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason); } int PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason) { - PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError"); - return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason); }