]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-102406: replace exception chaining by PEP-678 notes in codecs (#102407)
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>
Tue, 21 Mar 2023 21:36:31 +0000 (21:36 +0000)
committerGitHub <noreply@github.com>
Tue, 21 Mar 2023 21:36:31 +0000 (21:36 +0000)
Include/cpython/pyerrors.h
Lib/test/test_codecs.py
Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst [new file with mode: 0644]
Objects/exceptions.c
Python/codecs.c

index d0300f6ee56a25444ead06f07a3676a8f26fdb6b..65bdc942f5067a816c91864f4db6270833db64bf 100644 (file)
@@ -116,24 +116,6 @@ PyAPI_FUNC(int) _PyException_AddNote(
      PyObject *exc,
      PyObject *note);
 
-/* Helper that attempts to replace the current exception with one of the
- * same type but with a prefix added to the exception text. The resulting
- * exception description looks like:
- *
- *     prefix (exc_type: original_exc_str)
- *
- * Only some exceptions can be safely replaced. If the function determines
- * it isn't safe to perform the replacement, it will leave the original
- * unmodified exception in place.
- *
- * Returns a borrowed reference to the new exception (if any), NULL if the
- * existing exception was left in place.
- */
-PyAPI_FUNC(PyObject *) _PyErr_TrySetFromCause(
-    const char *prefix_format,   /* ASCII-encoded string  */
-    ...
-    );
-
 /* In signalmodule.c */
 
 int PySignal_SetWakeupFd(int fd);
index e3add0c1ee926c93665dca69dcac1879bf460e02..376175f90f63eb6e02a93e1d4179cb470d34c01c 100644 (file)
@@ -2819,24 +2819,19 @@ class TransformCodecTest(unittest.TestCase):
                 self.assertIsNone(failure.exception.__cause__)
 
     @unittest.skipUnless(zlib, "Requires zlib support")
-    def test_custom_zlib_error_is_wrapped(self):
+    def test_custom_zlib_error_is_noted(self):
         # Check zlib codec gives a good error for malformed input
-        msg = "^decoding with 'zlib_codec' codec failed"
-        with self.assertRaisesRegex(Exception, msg) as failure:
+        msg = "decoding with 'zlib_codec' codec failed"
+        with self.assertRaises(Exception) as failure:
             codecs.decode(b"hello", "zlib_codec")
-        self.assertIsInstance(failure.exception.__cause__,
-                                                type(failure.exception))
+        self.assertEqual(msg, failure.exception.__notes__[0])
 
-    def test_custom_hex_error_is_wrapped(self):
+    def test_custom_hex_error_is_noted(self):
         # Check hex codec gives a good error for malformed input
-        msg = "^decoding with 'hex_codec' codec failed"
-        with self.assertRaisesRegex(Exception, msg) as failure:
+        msg = "decoding with 'hex_codec' codec failed"
+        with self.assertRaises(Exception) as failure:
             codecs.decode(b"hello", "hex_codec")
-        self.assertIsInstance(failure.exception.__cause__,
-                                                type(failure.exception))
-
-    # Unfortunately, the bz2 module throws OSError, which the codec
-    # machinery currently can't wrap :(
+        self.assertEqual(msg, failure.exception.__notes__[0])
 
     # Ensure codec aliases from http://bugs.python.org/issue7475 work
     def test_aliases(self):
@@ -2860,11 +2855,8 @@ class TransformCodecTest(unittest.TestCase):
         self.assertRaises(ValueError, codecs.decode, b"", "uu-codec")
 
 
-# The codec system tries to wrap exceptions in order to ensure the error
-# mentions the operation being performed and the codec involved. We
-# currently *only* want this to happen for relatively stateless
-# exceptions, where the only significant information they contain is their
-# type and a single str argument.
+# The codec system tries to add notes to exceptions in order to ensure
+# the error mentions the operation being performed and the codec involved.
 
 # Use a local codec registry to avoid appearing to leak objects when
 # registering multiple search functions
@@ -2874,10 +2866,10 @@ def _get_test_codec(codec_name):
     return _TEST_CODECS.get(codec_name)
 
 
-class ExceptionChainingTest(unittest.TestCase):
+class ExceptionNotesTest(unittest.TestCase):
 
     def setUp(self):
-        self.codec_name = 'exception_chaining_test'
+        self.codec_name = 'exception_notes_test'
         codecs.register(_get_test_codec)
         self.addCleanup(codecs.unregister, _get_test_codec)
 
@@ -2901,91 +2893,77 @@ class ExceptionChainingTest(unittest.TestCase):
         _TEST_CODECS[self.codec_name] = codec_info
 
     @contextlib.contextmanager
-    def assertWrapped(self, operation, exc_type, msg):
-        full_msg = r"{} with {!r} codec failed \({}: {}\)".format(
-                  operation, self.codec_name, exc_type.__name__, msg)
-        with self.assertRaisesRegex(exc_type, full_msg) as caught:
+    def assertNoted(self, operation, exc_type, msg):
+        full_msg = r"{} with {!r} codec failed".format(
+                  operation, self.codec_name)
+        with self.assertRaises(exc_type) as caught:
             yield caught
-        self.assertIsInstance(caught.exception.__cause__, exc_type)
-        self.assertIsNotNone(caught.exception.__cause__.__traceback__)
+        self.assertIn(full_msg, caught.exception.__notes__[0])
+        caught.exception.__notes__.clear()
 
     def raise_obj(self, *args, **kwds):
         # Helper to dynamically change the object raised by a test codec
         raise self.obj_to_raise
 
-    def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
+    def check_note(self, obj_to_raise, msg, exc_type=RuntimeError):
         self.obj_to_raise = obj_to_raise
         self.set_codec(self.raise_obj, self.raise_obj)
-        with self.assertWrapped("encoding", exc_type, msg):
+        with self.assertNoted("encoding", exc_type, msg):
             "str_input".encode(self.codec_name)
-        with self.assertWrapped("encoding", exc_type, msg):
+        with self.assertNoted("encoding", exc_type, msg):
             codecs.encode("str_input", self.codec_name)
-        with self.assertWrapped("decoding", exc_type, msg):
+        with self.assertNoted("decoding", exc_type, msg):
             b"bytes input".decode(self.codec_name)
-        with self.assertWrapped("decoding", exc_type, msg):
+        with self.assertNoted("decoding", exc_type, msg):
             codecs.decode(b"bytes input", self.codec_name)
 
     def test_raise_by_type(self):
-        self.check_wrapped(RuntimeError, "")
+        self.check_note(RuntimeError, "")
 
     def test_raise_by_value(self):
-        msg = "This should be wrapped"
-        self.check_wrapped(RuntimeError(msg), msg)
+        msg = "This should be noted"
+        self.check_note(RuntimeError(msg), msg)
 
     def test_raise_grandchild_subclass_exact_size(self):
-        msg = "This should be wrapped"
+        msg = "This should be noted"
         class MyRuntimeError(RuntimeError):
             __slots__ = ()
-        self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
+        self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
 
     def test_raise_subclass_with_weakref_support(self):
-        msg = "This should be wrapped"
+        msg = "This should be noted"
         class MyRuntimeError(RuntimeError):
             pass
-        self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
-
-    def check_not_wrapped(self, obj_to_raise, msg):
-        def raise_obj(*args, **kwds):
-            raise obj_to_raise
-        self.set_codec(raise_obj, raise_obj)
-        with self.assertRaisesRegex(RuntimeError, msg):
-            "str input".encode(self.codec_name)
-        with self.assertRaisesRegex(RuntimeError, msg):
-            codecs.encode("str input", self.codec_name)
-        with self.assertRaisesRegex(RuntimeError, msg):
-            b"bytes input".decode(self.codec_name)
-        with self.assertRaisesRegex(RuntimeError, msg):
-            codecs.decode(b"bytes input", self.codec_name)
+        self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)
 
-    def test_init_override_is_not_wrapped(self):
+    def test_init_override(self):
         class CustomInit(RuntimeError):
             def __init__(self):
                 pass
-        self.check_not_wrapped(CustomInit, "")
+        self.check_note(CustomInit, "")
 
-    def test_new_override_is_not_wrapped(self):
+    def test_new_override(self):
         class CustomNew(RuntimeError):
             def __new__(cls):
                 return super().__new__(cls)
-        self.check_not_wrapped(CustomNew, "")
+        self.check_note(CustomNew, "")
 
-    def test_instance_attribute_is_not_wrapped(self):
-        msg = "This should NOT be wrapped"
+    def test_instance_attribute(self):
+        msg = "This should be noted"
         exc = RuntimeError(msg)
         exc.attr = 1
-        self.check_not_wrapped(exc, "^{}$".format(msg))
+        self.check_note(exc, "^{}$".format(msg))
 
-    def test_non_str_arg_is_not_wrapped(self):
-        self.check_not_wrapped(RuntimeError(1), "1")
+    def test_non_str_arg(self):
+        self.check_note(RuntimeError(1), "1")
 
-    def test_multiple_args_is_not_wrapped(self):
+    def test_multiple_args(self):
         msg_re = r"^\('a', 'b', 'c'\)$"
-        self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re)
+        self.check_note(RuntimeError('a', 'b', 'c'), msg_re)
 
     # http://bugs.python.org/issue19609
-    def test_codec_lookup_failure_not_wrapped(self):
+    def test_codec_lookup_failure(self):
         msg = "^unknown encoding: {}$".format(self.codec_name)
-        # The initial codec lookup should not be wrapped
         with self.assertRaisesRegex(LookupError, msg):
             "str input".encode(self.codec_name)
         with self.assertRaisesRegex(LookupError, msg):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst b/Misc/NEWS.d/next/Core and Builtins/2023-03-03-23-21-16.gh-issue-102406.XLqYO3.rst
new file mode 100644 (file)
index 0000000..e0d061c
--- /dev/null
@@ -0,0 +1 @@
+:mod:`codecs` encoding/decoding errors now get the context information (which operation and which codecs) attached as :pep:`678` notes instead of through chaining a new instance of the exception.
index d69f7400ca6042cbc63801a6e0e92b768e66f03c..a355244cf997e6485dca4772e6657d84c8dc0ef3 100644 (file)
@@ -3764,113 +3764,3 @@ _PyException_AddNote(PyObject *exc, PyObject *note)
     return res;
 }
 
-/* Helper to do the equivalent of "raise X from Y" in C, but always using
- * the current exception rather than passing one in.
- *
- * We currently limit this to *only* exceptions that use the BaseException
- * tp_init and tp_new methods, since we can be reasonably sure we can wrap
- * those correctly without losing data and without losing backwards
- * compatibility.
- *
- * We also aim to rule out *all* exceptions that might be storing additional
- * state, whether by having a size difference relative to BaseException,
- * additional arguments passed in during construction or by having a
- * non-empty instance dict.
- *
- * We need to be very careful with what we wrap, since changing types to
- * a broader exception type would be backwards incompatible for
- * existing codecs, and with different init or new method implementations
- * may either not support instantiation with PyErr_Format or lose
- * information when instantiated that way.
- *
- * XXX (ncoghlan): This could be made more comprehensive by exploiting the
- * fact that exceptions are expected to support pickling. If more builtin
- * exceptions (e.g. AttributeError) start to be converted to rich
- * exceptions with additional attributes, that's probably a better approach
- * to pursue over adding special cases for particular stateful subclasses.
- *
- * Returns a borrowed reference to the new exception (if any), NULL if the
- * existing exception was left in place.
- */
-PyObject *
-_PyErr_TrySetFromCause(const char *format, ...)
-{
-    PyObject* msg_prefix;
-    PyObject *instance_args;
-    Py_ssize_t num_args, caught_type_size, base_exc_size;
-    va_list vargs;
-    int same_basic_size;
-
-    PyObject *exc = PyErr_GetRaisedException();
-    PyTypeObject *caught_type = Py_TYPE(exc);
-    /* Ensure type info indicates no extra state is stored at the C level
-     * and that the type can be reinstantiated using PyErr_Format
-     */
-    caught_type_size = caught_type->tp_basicsize;
-    base_exc_size = _PyExc_BaseException.tp_basicsize;
-    same_basic_size = (
-        caught_type_size == base_exc_size ||
-        (_PyType_SUPPORTS_WEAKREFS(caught_type) &&
-            (caught_type_size == base_exc_size + (Py_ssize_t)sizeof(PyObject *))
-        )
-    );
-    if (caught_type->tp_init != (initproc)BaseException_init ||
-        caught_type->tp_new != BaseException_new ||
-        !same_basic_size ||
-        caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) {
-        /* We can't be sure we can wrap this safely, since it may contain
-         * more state than just the exception type. Accordingly, we just
-         * leave it alone.
-         */
-        PyErr_SetRaisedException(exc);
-        return NULL;
-    }
-
-    /* Check the args are empty or contain a single string */
-    instance_args = ((PyBaseExceptionObject *)exc)->args;
-    num_args = PyTuple_GET_SIZE(instance_args);
-    if (num_args > 1 ||
-        (num_args == 1 &&
-         !PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)))) {
-        /* More than 1 arg, or the one arg we do have isn't a string
-         */
-        PyErr_SetRaisedException(exc);
-        return NULL;
-    }
-
-    /* Ensure the instance dict is also empty */
-    if (!_PyObject_IsInstanceDictEmpty(exc)) {
-        /* While we could potentially copy a non-empty instance dictionary
-         * to the replacement exception, for now we take the more
-         * conservative path of leaving exceptions with attributes set
-         * alone.
-         */
-        PyErr_SetRaisedException(exc);
-        return NULL;
-    }
-
-    /* For exceptions that we can wrap safely, we chain the original
-     * exception to a new one of the exact same type with an
-     * error message that mentions the additional details and the
-     * original exception.
-     *
-     * It would be nice to wrap OSError and various other exception
-     * types as well, but that's quite a bit trickier due to the extra
-     * state potentially stored on OSError instances.
-     */
-    va_start(vargs, format);
-    msg_prefix = PyUnicode_FromFormatV(format, vargs);
-    va_end(vargs);
-    if (msg_prefix == NULL) {
-        Py_DECREF(exc);
-        return NULL;
-    }
-
-    PyErr_Format((PyObject*)Py_TYPE(exc), "%U (%s: %S)",
-                 msg_prefix, Py_TYPE(exc)->tp_name, exc);
-    Py_DECREF(msg_prefix);
-    PyObject *new_exc = PyErr_GetRaisedException();
-    PyException_SetCause(new_exc, exc);
-    PyErr_SetRaisedException(new_exc);
-    return new_exc;
-}
index b2087b499dfdbaa5ab4d7dc61ce80c6ece4bf054..9d800f9856c2f785513ee3855c82b9d238fb59a4 100644 (file)
@@ -382,20 +382,27 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
     return codec_getstreamcodec(encoding, stream, errors, 3);
 }
 
-/* Helper that tries to ensure the reported exception chain indicates the
- * codec that was invoked to trigger the failure without changing the type
- * of the exception raised.
- */
 static void
-wrap_codec_error(const char *operation,
-                 const char *encoding)
+add_note_to_codec_error(const char *operation,
+                        const char *encoding)
 {
-    /* TrySetFromCause will replace the active exception with a suitably
-     * updated clone if it can, otherwise it will leave the original
-     * exception alone.
-     */
-    _PyErr_TrySetFromCause("%s with '%s' codec failed",
-                           operation, encoding);
+    PyObject *exc = PyErr_GetRaisedException();
+    if (exc == NULL) {
+        return;
+    }
+    PyObject *note = PyUnicode_FromFormat("%s with '%s' codec failed",
+                                          operation, encoding);
+    if (note == NULL) {
+        _PyErr_ChainExceptions1(exc);
+        return;
+    }
+    int res = _PyException_AddNote(exc, note);
+    Py_DECREF(note);
+    if (res < 0) {
+        _PyErr_ChainExceptions1(exc);
+        return;
+    }
+    PyErr_SetRaisedException(exc);
 }
 
 /* Encode an object (e.g. a Unicode object) using the given encoding
@@ -418,7 +425,7 @@ _PyCodec_EncodeInternal(PyObject *object,
 
     result = PyObject_Call(encoder, args, NULL);
     if (result == NULL) {
-        wrap_codec_error("encoding", encoding);
+        add_note_to_codec_error("encoding", encoding);
         goto onError;
     }
 
@@ -463,7 +470,7 @@ _PyCodec_DecodeInternal(PyObject *object,
 
     result = PyObject_Call(decoder, args, NULL);
     if (result == NULL) {
-        wrap_codec_error("decoding", encoding);
+        add_note_to_codec_error("decoding", encoding);
         goto onError;
     }
     if (!PyTuple_Check(result) ||