SimpleExtendsException(PyExc_ValueError, UnicodeError,
"Unicode related error.");
+
+/*
+ * Check the validity of 'attr' as a unicode or bytes object depending
+ * on 'as_bytes' and return a new reference on it if it is the case.
+ *
+ * The 'name' is the attribute name and is only used for error reporting.
+ *
+ * On success, this returns a strong reference on 'attr'.
+ * On failure, this sets a TypeError and returns NULL.
+ */
static PyObject *
-get_bytes(PyObject *attr, const char *name)
+as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes)
{
- if (!attr) {
- PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
+ assert(as_bytes == 0 || as_bytes == 1);
+ if (attr == NULL) {
+ PyErr_Format(PyExc_TypeError, "%s attribute not set", name);
return NULL;
}
-
- if (!PyBytes_Check(attr)) {
- PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name);
+ if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) {
+ PyErr_Format(PyExc_TypeError,
+ "%s attribute must be %s",
+ name,
+ as_bytes ? "bytes" : "unicode");
return NULL;
}
return Py_NewRef(attr);
}
-static PyObject *
-get_unicode(PyObject *attr, const char *name)
-{
- if (!attr) {
- PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
- return NULL;
- }
- if (!PyUnicode_Check(attr)) {
+#define PyUnicodeError_Check(PTR) \
+ PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError)
+#define PyUnicodeError_CAST(PTR) \
+ (assert(PyUnicodeError_Check(PTR)), ((PyUnicodeErrorObject *)(PTR)))
+
+
+/* class names to use when reporting errors */
+#define Py_UNICODE_ENCODE_ERROR_NAME "UnicodeEncodeError"
+#define Py_UNICODE_DECODE_ERROR_NAME "UnicodeDecodeError"
+#define Py_UNICODE_TRANSLATE_ERROR_NAME "UnicodeTranslateError"
+
+
+/*
+ * Check that 'self' is a UnicodeError object.
+ *
+ * On success, this returns 0.
+ * On failure, this sets a TypeError exception and returns -1.
+ *
+ * The 'expect_type' is the name of the expected type, which is
+ * only used for error reporting.
+ *
+ * As an implementation detail, the `PyUnicode*Error_*` functions
+ * currently allow *any* subclass of UnicodeError as 'self'.
+ *
+ * Use one of the `Py_UNICODE_*_ERROR_NAME` macros to avoid typos.
+ */
+static inline int
+check_unicode_error_type(PyObject *self, const char *expect_type)
+{
+ assert(self != NULL);
+ if (!PyUnicodeError_Check(self)) {
PyErr_Format(PyExc_TypeError,
- "%.200s attribute must be unicode", name);
- return NULL;
+ "expecting a %s object, got %T", expect_type, self);
+ return -1;
}
- return Py_NewRef(attr);
+ return 0;
}
-static int
-set_unicodefromstring(PyObject **attr, const char *value)
+
+// --- PyUnicodeEncodeObject: internal helpers --------------------------------
+//
+// In the helpers below, the caller is responsible to ensure that 'self'
+// is a PyUnicodeErrorObject, although this is verified on DEBUG builds
+// through PyUnicodeError_CAST().
+
+/*
+ * Return the underlying (str) 'encoding' attribute of a UnicodeError object.
+ */
+static inline PyObject *
+unicode_error_get_encoding_impl(PyObject *self)
+{
+ assert(self != NULL);
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ return as_unicode_error_attribute(exc->encoding, "encoding", false);
+}
+
+
+/*
+ * Return the underlying 'object' attribute of a UnicodeError object
+ * as a bytes or a string instance, depending on the 'as_bytes' flag.
+ */
+static inline PyObject *
+unicode_error_get_object_impl(PyObject *self, int as_bytes)
{
- PyObject *obj = PyUnicode_FromString(value);
- if (!obj)
+ assert(self != NULL);
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ return as_unicode_error_attribute(exc->object, "object", as_bytes);
+}
+
+
+/*
+ * Return the underlying (str) 'reason' attribute of a UnicodeError object.
+ */
+static inline PyObject *
+unicode_error_get_reason_impl(PyObject *self)
+{
+ assert(self != NULL);
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ return as_unicode_error_attribute(exc->reason, "reason", false);
+}
+
+
+/*
+ * Set the underlying (str) 'reason' attribute of a UnicodeError object.
+ *
+ * Return 0 on success and -1 on failure.
+ */
+static inline int
+unicode_error_set_reason_impl(PyObject *self, const char *reason)
+{
+ assert(self != NULL);
+ PyObject *value = PyUnicode_FromString(reason);
+ if (value == NULL) {
return -1;
- Py_XSETREF(*attr, obj);
+ }
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ Py_XSETREF(exc->reason, value);
return 0;
}
+
+/*
+ * Set the 'start' attribute of a UnicodeError object.
+ *
+ * Return 0 on success and -1 on failure.
+ */
+static inline int
+unicode_error_set_start_impl(PyObject *self, Py_ssize_t start)
+{
+ assert(self != NULL);
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ exc->start = start;
+ return 0;
+}
+
+
+/*
+ * Set the 'end' attribute of a UnicodeError object.
+ *
+ * Return 0 on success and -1 on failure.
+ */
+static inline int
+unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
+{
+ assert(self != NULL);
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ exc->end = end;
+ return 0;
+}
+
+// --- PyUnicodeEncodeObject: internal getters --------------------------------
+
/*
* Adjust the (inclusive) 'start' value of a UnicodeError object.
*
return start;
}
+
/*
* Adjust the (exclusive) 'end' value of a UnicodeError object.
*
return end;
}
-#define _PyUnicodeError_CAST(PTR) ((PyUnicodeErrorObject *)(PTR))
-#define PyUnicodeError_Check(PTR) \
- PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError)
-#define PyUnicodeError_CAST(PTR) \
- (assert(PyUnicodeError_Check(PTR)), _PyUnicodeError_CAST(PTR))
-
-static inline int
-check_unicode_error_type(PyObject *self, const char *expect_type)
+/*
+ * Get various common parameters of a UnicodeError object.
+ *
+ * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject,
+ * although this condition is verified by this function on DEBUG builds.
+ *
+ * Return 0 on success and -1 on failure.
+ *
+ * Output parameters:
+ *
+ * obj A strong reference to the 'object' attribute.
+ * objlen The 'object' length.
+ * start The clipped 'start' attribute.
+ * end The clipped 'end' attribute.
+ *
+ * An output parameter can be NULL to indicate that
+ * the corresponding value does not need to be stored.
+ *
+ * Input parameter:
+ *
+ * as_bytes If 1, the error's 'object' attribute must be a bytes object,
+ * i.e. the call is for a `UnicodeDecodeError`. Otherwise, the
+ * 'object' attribute must be a string.
+ *
+ * A TypeError is raised if the 'object' type is incompatible.
+ */
+int
+_PyUnicodeError_GetParams(PyObject *self,
+ PyObject **obj, Py_ssize_t *objlen,
+ Py_ssize_t *start, Py_ssize_t *end,
+ int as_bytes)
{
- if (!PyUnicodeError_Check(self)) {
- PyErr_Format(PyExc_TypeError,
- "expecting a %s object, got %T", expect_type, self);
+ assert(self != NULL);
+ assert(as_bytes == 0 || as_bytes == 1);
+ PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self);
+ PyObject *r = as_unicode_error_attribute(exc->object, "object", as_bytes);
+ if (r == NULL) {
return -1;
}
+
+ Py_ssize_t n = as_bytes ? PyBytes_GET_SIZE(r) : PyUnicode_GET_LENGTH(r);
+ if (objlen != NULL) {
+ *objlen = n;
+ }
+ if (start != NULL) {
+ *start = unicode_error_adjust_start(exc->start, n);
+ assert(*start >= 0);
+ assert(*start <= n);
+ }
+ if (end != NULL) {
+ *end = unicode_error_adjust_end(exc->end, n);
+ assert(*end >= 0);
+ assert(*end <= n);
+ }
+ if (obj != NULL) {
+ *obj = r;
+ }
+ else {
+ Py_DECREF(r);
+ }
return 0;
}
-static inline PyUnicodeErrorObject *
-as_unicode_error(PyObject *self, const char *expect_type)
-{
- int rc = check_unicode_error_type(self, expect_type);
- return rc < 0 ? NULL : _PyUnicodeError_CAST(self);
-}
+// --- PyUnicodeEncodeObject: 'encoding' getters ------------------------------
+// Note: PyUnicodeTranslateError does not have an 'encoding' attribute.
PyObject *
PyUnicodeEncodeError_GetEncoding(PyObject *self)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
- return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding");
+ int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME);
+ return rc < 0 ? NULL : unicode_error_get_encoding_impl(self);
}
+
PyObject *
PyUnicodeDecodeError_GetEncoding(PyObject *self)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
- return exc == NULL ? NULL : get_unicode(exc->encoding, "encoding");
+ int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME);
+ return rc < 0 ? NULL : unicode_error_get_encoding_impl(self);
}
+
+// --- PyUnicodeEncodeObject: 'object' getters --------------------------------
+
PyObject *
PyUnicodeEncodeError_GetObject(PyObject *self)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
- return exc == NULL ? NULL : get_unicode(exc->object, "object");
+ int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME);
+ return rc < 0 ? NULL : unicode_error_get_object_impl(self, false);
}
+
PyObject *
PyUnicodeDecodeError_GetObject(PyObject *self)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
- return exc == NULL ? NULL : get_bytes(exc->object, "object");
+ int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME);
+ return rc < 0 ? NULL : unicode_error_get_object_impl(self, true);
}
+
PyObject *
PyUnicodeTranslateError_GetObject(PyObject *self)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
- return exc == NULL ? NULL : get_unicode(exc->object, "object");
+ int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME);
+ return rc < 0 ? NULL : unicode_error_get_object_impl(self, false);
}
+
+// --- PyUnicodeEncodeObject: 'start' getters ---------------------------------
+
+/*
+ * Specialization of _PyUnicodeError_GetParams() for the 'start' attribute.
+ *
+ * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject,
+ * although this condition is verified by this function on DEBUG builds.
+ */
+static inline int
+unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes)
+{
+ assert(self != NULL);
+ return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, as_bytes);
+}
+
+
int
PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
- if (exc == NULL) {
- return -1;
- }
- PyObject *obj = get_unicode(exc->object, "object");
- if (obj == NULL) {
- return -1;
- }
- Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
- Py_DECREF(obj);
- *start = unicode_error_adjust_start(exc->start, size);
- return 0;
+ int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, false);
}
int
PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
- if (exc == NULL) {
- return -1;
- }
- PyObject *obj = get_bytes(exc->object, "object");
- if (obj == NULL) {
- return -1;
- }
- Py_ssize_t size = PyBytes_GET_SIZE(obj);
- Py_DECREF(obj);
- *start = unicode_error_adjust_start(exc->start, size);
- return 0;
+ int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, true);
}
int
PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
- if (exc == NULL) {
- return -1;
- }
- PyObject *obj = get_unicode(exc->object, "object");
- if (obj == NULL) {
- return -1;
- }
- Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
- Py_DECREF(obj);
- *start = unicode_error_adjust_start(exc->start, size);
- return 0;
+ int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, false);
}
-static inline int
-unicode_error_set_start_impl(PyObject *self, Py_ssize_t start)
-{
- PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self);
- exc->start = start;
- return 0;
-}
-
+// --- PyUnicodeEncodeObject: 'start' setters ---------------------------------
int
PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start)
{
- int rc = check_unicode_error_type(self, "UnicodeEncodeError");
+ int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME);
return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
}
int
PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start)
{
- int rc = check_unicode_error_type(self, "UnicodeDecodeError");
+ int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME);
return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
}
int
PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start)
{
- int rc = check_unicode_error_type(self, "UnicodeTranslateError");
+ int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME);
return rc < 0 ? -1 : unicode_error_set_start_impl(self, start);
}
+// --- PyUnicodeEncodeObject: 'end' getters -----------------------------------
+
+/*
+ * Specialization of _PyUnicodeError_GetParams() for the 'end' attribute.
+ *
+ * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject,
+ * although this condition is verified by this function on DEBUG builds.
+ */
+static inline int
+unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes)
+{
+ assert(self != NULL);
+ return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, as_bytes);
+}
+
+
int
PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
- if (exc == NULL) {
- return -1;
- }
- PyObject *obj = get_unicode(exc->object, "object");
- if (obj == NULL) {
- return -1;
- }
- Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
- Py_DECREF(obj);
- *end = unicode_error_adjust_end(exc->end, size);
- return 0;
+ int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, false);
}
int
PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
- if (exc == NULL) {
- return -1;
- }
- PyObject *obj = get_bytes(exc->object, "object");
- if (obj == NULL) {
- return -1;
- }
- Py_ssize_t size = PyBytes_GET_SIZE(obj);
- Py_DECREF(obj);
- *end = unicode_error_adjust_end(exc->end, size);
- return 0;
+ int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, true);
}
int
PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
- if (exc == NULL) {
- return -1;
- }
- PyObject *obj = get_unicode(exc->object, "object");
- if (obj == NULL) {
- return -1;
- }
- Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
- Py_DECREF(obj);
- *end = unicode_error_adjust_end(exc->end, size);
- return 0;
+ int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, false);
}
-static inline int
-unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
-{
- PyUnicodeErrorObject *exc = _PyUnicodeError_CAST(self);
- exc->end = end;
- return 0;
-}
-
+// --- PyUnicodeEncodeObject: 'end' setters -----------------------------------
int
PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end)
{
- int rc = check_unicode_error_type(self, "UnicodeEncodeError");
+ int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME);
return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
}
int
PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end)
{
- int rc = check_unicode_error_type(self, "UnicodeDecodeError");
+ int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME);
return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
}
int
PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end)
{
- int rc = check_unicode_error_type(self, "UnicodeTranslateError");
+ int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME);
return rc < 0 ? -1 : unicode_error_set_end_impl(self, end);
}
+// --- PyUnicodeEncodeObject: 'reason' getters --------------------------------
+
PyObject *
PyUnicodeEncodeError_GetReason(PyObject *self)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
- return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
+ int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME);
+ return rc < 0 ? NULL : unicode_error_get_reason_impl(self);
}
PyObject *
PyUnicodeDecodeError_GetReason(PyObject *self)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
- return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
+ int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME);
+ return rc < 0 ? NULL : unicode_error_get_reason_impl(self);
}
PyObject *
PyUnicodeTranslateError_GetReason(PyObject *self)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
- return exc == NULL ? NULL : get_unicode(exc->reason, "reason");
+ int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME);
+ return rc < 0 ? NULL : unicode_error_get_reason_impl(self);
}
+// --- PyUnicodeEncodeObject: 'reason' setters --------------------------------
+
int
PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeEncodeError");
- return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
+ int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason);
}
int
PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeDecodeError");
- return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
+ int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason);
}
int
PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason)
{
- PyUnicodeErrorObject *exc = as_unicode_error(self, "UnicodeTranslateError");
- return exc == NULL ? -1 : set_unicodefromstring(&exc->reason, reason);
+ int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME);
+ return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason);
}