]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-122163: Add notes for JSON serialization errors (GH-122165)
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 23 Jul 2024 17:02:54 +0000 (20:02 +0300)
committerGitHub <noreply@github.com>
Tue, 23 Jul 2024 17:02:54 +0000 (20:02 +0300)
This allows to identify the source of the error.

Doc/whatsnew/3.14.rst
Include/internal/pycore_pyerrors.h
Lib/json/encoder.py
Lib/test/test_json/test_default.py
Lib/test/test_json/test_fail.py
Lib/test/test_json/test_recursion.py
Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst [new file with mode: 0644]
Modules/_json.c

index f45a44be0bf4a5de4971fc9ccd9468a5be8d357b..bd8bdcb6732fdef44da9aa34ce27283af11d5d0a 100644 (file)
@@ -112,6 +112,13 @@ Added support for converting any objects that have the
 :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`.
 (Contributed by Serhiy Storchaka in :gh:`82017`.)
 
+json
+----
+
+Add notes for JSON serialization errors that allow to identify the source
+of the error.
+(Contributed by Serhiy Storchaka in :gh:`122163`.)
+
 os
 --
 
index 15071638203457b904460f183335133d958be1b8..9835e495d176e7e3ec79b4e283d9878c814824b8 100644 (file)
@@ -161,7 +161,8 @@ extern PyObject* _Py_Offer_Suggestions(PyObject* exception);
 PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b,
                                           Py_ssize_t max_cost);
 
-void _PyErr_FormatNote(const char *format, ...);
+// Export for '_json' shared extension
+PyAPI_FUNC(void) _PyErr_FormatNote(const char *format, ...);
 
 /* Context manipulation (PEP 3134) */
 
index 323332f064edf8fda55182f5a987054c2d9a7cd9..b804224098e14f8f2418b8e29e78035009ca93b5 100644 (file)
@@ -293,37 +293,40 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
         else:
             newline_indent = None
             separator = _item_separator
-        first = True
-        for value in lst:
-            if first:
-                first = False
-            else:
+        for i, value in enumerate(lst):
+            if i:
                 buf = separator
-            if isinstance(value, str):
-                yield buf + _encoder(value)
-            elif value is None:
-                yield buf + 'null'
-            elif value is True:
-                yield buf + 'true'
-            elif value is False:
-                yield buf + 'false'
-            elif isinstance(value, int):
-                # Subclasses of int/float may override __repr__, but we still
-                # want to encode them as integers/floats in JSON. One example
-                # within the standard library is IntEnum.
-                yield buf + _intstr(value)
-            elif isinstance(value, float):
-                # see comment above for int
-                yield buf + _floatstr(value)
-            else:
-                yield buf
-                if isinstance(value, (list, tuple)):
-                    chunks = _iterencode_list(value, _current_indent_level)
-                elif isinstance(value, dict):
-                    chunks = _iterencode_dict(value, _current_indent_level)
+            try:
+                if isinstance(value, str):
+                    yield buf + _encoder(value)
+                elif value is None:
+                    yield buf + 'null'
+                elif value is True:
+                    yield buf + 'true'
+                elif value is False:
+                    yield buf + 'false'
+                elif isinstance(value, int):
+                    # Subclasses of int/float may override __repr__, but we still
+                    # want to encode them as integers/floats in JSON. One example
+                    # within the standard library is IntEnum.
+                    yield buf + _intstr(value)
+                elif isinstance(value, float):
+                    # see comment above for int
+                    yield buf + _floatstr(value)
                 else:
-                    chunks = _iterencode(value, _current_indent_level)
-                yield from chunks
+                    yield buf
+                    if isinstance(value, (list, tuple)):
+                        chunks = _iterencode_list(value, _current_indent_level)
+                    elif isinstance(value, dict):
+                        chunks = _iterencode_dict(value, _current_indent_level)
+                    else:
+                        chunks = _iterencode(value, _current_indent_level)
+                    yield from chunks
+            except GeneratorExit:
+                raise
+            except BaseException as exc:
+                exc.add_note(f'when serializing {type(lst).__name__} item {i}')
+                raise
         if newline_indent is not None:
             _current_indent_level -= 1
             yield '\n' + _indent * _current_indent_level
@@ -382,28 +385,34 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                 yield item_separator
             yield _encoder(key)
             yield _key_separator
-            if isinstance(value, str):
-                yield _encoder(value)
-            elif value is None:
-                yield 'null'
-            elif value is True:
-                yield 'true'
-            elif value is False:
-                yield 'false'
-            elif isinstance(value, int):
-                # see comment for int/float in _make_iterencode
-                yield _intstr(value)
-            elif isinstance(value, float):
-                # see comment for int/float in _make_iterencode
-                yield _floatstr(value)
-            else:
-                if isinstance(value, (list, tuple)):
-                    chunks = _iterencode_list(value, _current_indent_level)
-                elif isinstance(value, dict):
-                    chunks = _iterencode_dict(value, _current_indent_level)
+            try:
+                if isinstance(value, str):
+                    yield _encoder(value)
+                elif value is None:
+                    yield 'null'
+                elif value is True:
+                    yield 'true'
+                elif value is False:
+                    yield 'false'
+                elif isinstance(value, int):
+                    # see comment for int/float in _make_iterencode
+                    yield _intstr(value)
+                elif isinstance(value, float):
+                    # see comment for int/float in _make_iterencode
+                    yield _floatstr(value)
                 else:
-                    chunks = _iterencode(value, _current_indent_level)
-                yield from chunks
+                    if isinstance(value, (list, tuple)):
+                        chunks = _iterencode_list(value, _current_indent_level)
+                    elif isinstance(value, dict):
+                        chunks = _iterencode_dict(value, _current_indent_level)
+                    else:
+                        chunks = _iterencode(value, _current_indent_level)
+                    yield from chunks
+            except GeneratorExit:
+                raise
+            except BaseException as exc:
+                exc.add_note(f'when serializing {type(dct).__name__} item {key!r}')
+                raise
         if newline_indent is not None:
             _current_indent_level -= 1
             yield '\n' + _indent * _current_indent_level
@@ -436,8 +445,14 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
                 if markerid in markers:
                     raise ValueError("Circular reference detected")
                 markers[markerid] = o
-            o = _default(o)
-            yield from _iterencode(o, _current_indent_level)
+            newobj = _default(o)
+            try:
+                yield from _iterencode(newobj, _current_indent_level)
+            except GeneratorExit:
+                raise
+            except BaseException as exc:
+                exc.add_note(f'when serializing {type(o).__name__} object')
+                raise
             if markers is not None:
                 del markers[markerid]
     return _iterencode
index 3ce16684a08272df566ae62081c65888d91cd309..811880a15c80208979460f39995759e611d522e1 100644 (file)
@@ -8,6 +8,24 @@ class TestDefault:
             self.dumps(type, default=repr),
             self.dumps(repr(type)))
 
+    def test_bad_default(self):
+        def default(obj):
+            if obj is NotImplemented:
+                raise ValueError
+            if obj is ...:
+                return NotImplemented
+            if obj is type:
+                return collections
+            return [...]
+
+        with self.assertRaises(ValueError) as cm:
+            self.dumps(type, default=default)
+        self.assertEqual(cm.exception.__notes__,
+                         ['when serializing ellipsis object',
+                          'when serializing list item 0',
+                          'when serializing module object',
+                          'when serializing type object'])
+
     def test_ordereddict(self):
         od = collections.OrderedDict(a=1, b=2, c=3, d=4)
         od.move_to_end('b')
index a74240f1107de348084eb1baebd84be6ae56ce20..7c1696cc66d12b764513fa5b3d0d88047f9bf2d1 100644 (file)
@@ -100,8 +100,27 @@ class TestFail:
     def test_not_serializable(self):
         import sys
         with self.assertRaisesRegex(TypeError,
-                'Object of type module is not JSON serializable'):
+                'Object of type module is not JSON serializable') as cm:
             self.dumps(sys)
+        self.assertFalse(hasattr(cm.exception, '__notes__'))
+
+        with self.assertRaises(TypeError) as cm:
+            self.dumps([1, [2, 3, sys]])
+        self.assertEqual(cm.exception.__notes__,
+                         ['when serializing list item 2',
+                          'when serializing list item 1'])
+
+        with self.assertRaises(TypeError) as cm:
+            self.dumps((1, (2, 3, sys)))
+        self.assertEqual(cm.exception.__notes__,
+                         ['when serializing tuple item 2',
+                          'when serializing tuple item 1'])
+
+        with self.assertRaises(TypeError) as cm:
+            self.dumps({'a': {'b': sys}})
+        self.assertEqual(cm.exception.__notes__,
+                         ["when serializing dict item 'b'",
+                          "when serializing dict item 'a'"])
 
     def test_truncated_input(self):
         test_cases = [
index 164ff2013eb552c4acadc4f86761ee0c6d577e84..290207e9c15b8813c7913bf88188940a6ad8f3f8 100644 (file)
@@ -12,8 +12,8 @@ class TestRecursion:
         x.append(x)
         try:
             self.dumps(x)
-        except ValueError:
-            pass
+        except ValueError as exc:
+            self.assertEqual(exc.__notes__, ["when serializing list item 0"])
         else:
             self.fail("didn't raise ValueError on list recursion")
         x = []
@@ -21,8 +21,8 @@ class TestRecursion:
         x.append(y)
         try:
             self.dumps(x)
-        except ValueError:
-            pass
+        except ValueError as exc:
+            self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2)
         else:
             self.fail("didn't raise ValueError on alternating list recursion")
         y = []
@@ -35,8 +35,8 @@ class TestRecursion:
         x["test"] = x
         try:
             self.dumps(x)
-        except ValueError:
-            pass
+        except ValueError as exc:
+            self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"])
         else:
             self.fail("didn't raise ValueError on dict recursion")
         x = {}
@@ -60,8 +60,10 @@ class TestRecursion:
         enc.recurse = True
         try:
             enc.encode(JSONTestObject)
-        except ValueError:
-            pass
+        except ValueError as exc:
+            self.assertEqual(exc.__notes__,
+                             ["when serializing list item 0",
+                              "when serializing type object"])
         else:
             self.fail("didn't raise ValueError on default recursion")
 
diff --git a/Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst b/Misc/NEWS.d/next/Library/2024-07-23-15-11-13.gh-issue-122163.4wRUuM.rst
new file mode 100644 (file)
index 0000000..a4625c2
--- /dev/null
@@ -0,0 +1,2 @@
+Add notes for JSON serialization errors that allow to identify the source of
+the error.
index c7fe1561bb10186e95be60a88a725ba2c2a9ac1f..9e29de0f22465f0e459a2889446fbaab5739d453 100644 (file)
@@ -11,6 +11,7 @@
 #include "Python.h"
 #include "pycore_ceval.h"           // _Py_EnterRecursiveCall()
 #include "pycore_runtime.h"         // _PyRuntime
+#include "pycore_pyerrors.h"        // _PyErr_FormatNote
 
 #include "pycore_global_strings.h"  // _Py_ID()
 #include <stdbool.h>                // bool
@@ -1461,6 +1462,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
 
         Py_DECREF(newobj);
         if (rv) {
+            _PyErr_FormatNote("when serializing %T object", obj);
             Py_XDECREF(ident);
             return -1;
         }
@@ -1477,7 +1479,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
 
 static int
 encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
-                         PyObject *key, PyObject *value,
+                         PyObject *dct, PyObject *key, PyObject *value,
                          PyObject *newline_indent,
                          PyObject *item_separator)
 {
@@ -1535,6 +1537,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir
         return -1;
     }
     if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
+        _PyErr_FormatNote("when serializing %T item %R", dct, key);
         return -1;
     }
     return 0;
@@ -1606,7 +1609,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
 
             key = PyTuple_GET_ITEM(item, 0);
             value = PyTuple_GET_ITEM(item, 1);
-            if (encoder_encode_key_value(s, writer, &first, key, value,
+            if (encoder_encode_key_value(s, writer, &first, dct, key, value,
                                          new_newline_indent,
                                          current_item_separator) < 0)
                 goto bail;
@@ -1616,7 +1619,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
     } else {
         Py_ssize_t pos = 0;
         while (PyDict_Next(dct, &pos, &key, &value)) {
-            if (encoder_encode_key_value(s, writer, &first, key, value,
+            if (encoder_encode_key_value(s, writer, &first, dct, key, value,
                                          new_newline_indent,
                                          current_item_separator) < 0)
                 goto bail;
@@ -1710,8 +1713,10 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
             if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
                 goto bail;
         }
-        if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
+        if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
+            _PyErr_FormatNote("when serializing %T item %zd", seq, i);
             goto bail;
+        }
     }
     if (ident != NULL) {
         if (PyDict_DelItem(s->markers, ident))