[3.12] gh-123378: fix a crash in `UnicodeError.__str__` (GH-124935) (#125098)

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Tue, 8 Oct 2024 11:56:18 +0000 (13:56 +0200)

committer GitHub <noreply@github.com>

Tue, 8 Oct 2024 11:56:18 +0000 (11:56 +0000)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 8 Oct 2024 11:56:18 +0000 (13:56 +0200)
committer GitHub <noreply@github.com>
Tue, 8 Oct 2024 11:56:18 +0000 (11:56 +0000)
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py

index 5b0334f34652d2128389d29ebf785fb7a350c003..c5f4b892efb50f39876ddc8243fe089cf5dc946a 100644 (file)
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -8,6 +8,7 @@ import pickle
  import weakref
  import errno
  from codecs import BOM_UTF8
+from itertools import product
  from textwrap import dedent
  
  from test.support import (captured_stderr, check_impl_detail,
@@ -1333,6 +1334,29 @@ class ExceptionTests(unittest.TestCase):
          for klass in klasses:
              self.assertEqual(str(klass.__new__(klass)), "")
  
+    def test_unicode_error_str_does_not_crash(self):
+        # Test that str(UnicodeError(...)) does not crash.
+        # See https://github.com/python/cpython/issues/123378.
+
+        for start, end, objlen in product(
+            range(-5, 5),
+            range(-5, 5),
+            range(7),
+        ):
+            obj = 'a' * objlen
+            with self.subTest('encode', objlen=objlen, start=start, end=end):
+                exc = UnicodeEncodeError('utf-8', obj, start, end, '')
+                self.assertIsInstance(str(exc), str)
+
+            with self.subTest('translate', objlen=objlen, start=start, end=end):
+                exc = UnicodeTranslateError(obj, start, end, '')
+                self.assertIsInstance(str(exc), str)
+
+            encoded = obj.encode()
+            with self.subTest('decode', objlen=objlen, start=start, end=end):
+                exc = UnicodeDecodeError('utf-8', encoded, start, end, '')
+                self.assertIsInstance(str(exc), str)
+
      @no_tracing
      def test_badisinstance(self):
          # Bug #2542: if issubclass(e, MyException) raises an exception,
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst

new file mode 100644 (file)

index 0000000..5cd3453
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst
@@ -0,0 +1,3 @@
+Fix a crash in the :meth:`~object.__str__` method of :exc:`UnicodeError`
+objects when the :attr:`UnicodeError.start` and :attr:`UnicodeError.end`
+values are invalid or out-of-range. Patch by Bénédikt Tran.
diff --git a/Objects/exceptions.c b/Objects/exceptions.c

index 4f2153b19358d2aa34e0107bde27ff0da0cb9bf5..c579563db75275d8813fdc7e698299953f79313e 100644 (file)
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2961,46 +2961,55 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
  static PyObject *
  UnicodeEncodeError_str(PyObject *self)
  {
-    PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
      PyObject *result = NULL;
      PyObject *reason_str = NULL;
      PyObject *encoding_str = NULL;
  
-    if (!uself->object)
+    if (exc->object == NULL) {
          /* Not properly initialized. */
          return PyUnicode_FromString("");
+    }
  
      /* Get reason and encoding as strings, which they might not be if
         they've been modified after we were constructed. */
-    reason_str = PyObject_Str(uself->reason);
-    if (reason_str == NULL)
+    reason_str = PyObject_Str(exc->reason);
+    if (reason_str == NULL) {
          goto done;
-    encoding_str = PyObject_Str(uself->encoding);
-    if (encoding_str == NULL)
+    }
+    encoding_str = PyObject_Str(exc->encoding);
+    if (encoding_str == NULL) {
          goto done;
+    }
+
+    Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
+    Py_ssize_t start = exc->start, end = exc->end;
  
-    if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
-        Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
+    if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
+        Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
          const char *fmt;
-        if (badchar <= 0xff)
+        if (badchar <= 0xff) {
              fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U";
-        else if (badchar <= 0xffff)
+        }
+        else if (badchar <= 0xffff) {
              fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
-        else
+        }
+        else {
              fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
+        }
          result = PyUnicode_FromFormat(
              fmt,
              encoding_str,
              (int)badchar,
-            uself->start,
+            start,
              reason_str);
      }
      else {
          result = PyUnicode_FromFormat(
              "'%U' codec can't encode characters in position %zd-%zd: %U",
              encoding_str,
-            uself->start,
-            uself->end-1,
+            start,
+            end - 1,
              reason_str);
      }
  done:
@@ -3074,41 +3083,46 @@ error:
  static PyObject *
  UnicodeDecodeError_str(PyObject *self)
  {
-    PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
      PyObject *result = NULL;
      PyObject *reason_str = NULL;
      PyObject *encoding_str = NULL;
  
-    if (!uself->object)
+    if (exc->object == NULL) {
          /* Not properly initialized. */
          return PyUnicode_FromString("");
+    }
  
      /* Get reason and encoding as strings, which they might not be if
         they've been modified after we were constructed. */
-    reason_str = PyObject_Str(uself->reason);
-    if (reason_str == NULL)
+    reason_str = PyObject_Str(exc->reason);
+    if (reason_str == NULL) {
          goto done;
-    encoding_str = PyObject_Str(uself->encoding);
-    if (encoding_str == NULL)
+    }
+    encoding_str = PyObject_Str(exc->encoding);
+    if (encoding_str == NULL) {
          goto done;
+    }
+
+    Py_ssize_t len = PyBytes_GET_SIZE(exc->object);
+    Py_ssize_t start = exc->start, end = exc->end;
  
-    if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) {
-        int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
+    if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
+        int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff);
          result = PyUnicode_FromFormat(
              "'%U' codec can't decode byte 0x%02x in position %zd: %U",
              encoding_str,
-            byte,
-            uself->start,
+            badbyte,
+            start,
              reason_str);
      }
      else {
          result = PyUnicode_FromFormat(
              "'%U' codec can't decode bytes in position %zd-%zd: %U",
              encoding_str,
-            uself->start,
-            uself->end-1,
-            reason_str
-            );
+            start,
+            end - 1,
+            reason_str);
      }
  done:
      Py_XDECREF(reason_str);
@@ -3171,42 +3185,49 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
  static PyObject *
  UnicodeTranslateError_str(PyObject *self)
  {
-    PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self;
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
      PyObject *result = NULL;
      PyObject *reason_str = NULL;
  
-    if (!uself->object)
+    if (exc->object == NULL) {
          /* Not properly initialized. */
          return PyUnicode_FromString("");
+    }
  
      /* Get reason as a string, which it might not be if it's been
         modified after we were constructed. */
-    reason_str = PyObject_Str(uself->reason);
-    if (reason_str == NULL)
+    reason_str = PyObject_Str(exc->reason);
+    if (reason_str == NULL) {
          goto done;
+    }
+
+    Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object);
+    Py_ssize_t start = exc->start, end = exc->end;
  
-    if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
-        Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
+    if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) {
+        Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start);
          const char *fmt;
-        if (badchar <= 0xff)
+        if (badchar <= 0xff) {
              fmt = "can't translate character '\\x%02x' in position %zd: %U";
-        else if (badchar <= 0xffff)
+        }
+        else if (badchar <= 0xffff) {
              fmt = "can't translate character '\\u%04x' in position %zd: %U";
-        else
+        }
+        else {
              fmt = "can't translate character '\\U%08x' in position %zd: %U";
+        }
          result = PyUnicode_FromFormat(
              fmt,
              (int)badchar,
-            uself->start,
-            reason_str
-        );
-    } else {
+            start,
+            reason_str);
+    }
+    else {
          result = PyUnicode_FromFormat(
              "can't translate characters in position %zd-%zd: %U",
-            uself->start,
-            uself->end-1,
-            reason_str
-            );
+            start,
+            end - 1,
+            reason_str);
      }
  done:
      Py_XDECREF(reason_str);
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Tue, 8 Oct 2024 11:56:18 +0000 (13:56 +0200)
committer	GitHub <noreply@github.com>
	Tue, 8 Oct 2024 11:56:18 +0000 (11:56 +0000)
Lib/test/test_exceptions.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst	[new file with mode: 0644]	patch \| blob
Objects/exceptions.c		patch \| blob \| blame \| history