]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-148241: Fix json serialization for str subclasses (#148249)
authorVictor Stinner <vstinner@python.org>
Thu, 9 Apr 2026 11:50:44 +0000 (13:50 +0200)
committerGitHub <noreply@github.com>
Thu, 9 Apr 2026 11:50:44 +0000 (13:50 +0200)
Fix json serialization: no longer call str(obj) on str subclasses.

Replace PyUnicodeWriter_WriteStr() with PyUnicodeWriter_WriteASCII()
and private _PyUnicodeWriter_WriteStr().

Lib/test/test_json/test_dump.py
Lib/test/test_json/test_encode_basestring_ascii.py
Lib/test/test_json/test_enum.py
Misc/NEWS.d/next/Library/2026-04-08-14-19-17.gh-issue-148241.fO_QT4.rst [new file with mode: 0644]
Modules/_json.c

index 9880698455ca5e4484131d486cfe7f01163f1585..850e5ceeba0c899b31c78f05a0e0cf54e90a7585 100644 (file)
@@ -77,6 +77,36 @@ class TestDump:
         d[1337] = "true.dat"
         self.assertEqual(self.dumps(d, sort_keys=True), '{"1337": "true.dat"}')
 
+    def test_dumps_str_subclass(self):
+        # Don't call obj.__str__() on str subclasses
+
+        # str subclass which returns a different string on str(obj)
+        class StrSubclass(str):
+            def __str__(self):
+                return "StrSubclass"
+
+        obj = StrSubclass('ascii')
+        self.assertEqual(self.dumps(obj), '"ascii"')
+        self.assertEqual(self.dumps([obj]), '["ascii"]')
+        self.assertEqual(self.dumps({'key': obj}), '{"key": "ascii"}')
+
+        obj = StrSubclass('escape\n')
+        self.assertEqual(self.dumps(obj), '"escape\\n"')
+        self.assertEqual(self.dumps([obj]), '["escape\\n"]')
+        self.assertEqual(self.dumps({'key': obj}), '{"key": "escape\\n"}')
+
+        obj = StrSubclass('nonascii:é')
+        self.assertEqual(self.dumps(obj, ensure_ascii=False),
+                         '"nonascii:é"')
+        self.assertEqual(self.dumps([obj], ensure_ascii=False),
+                         '["nonascii:é"]')
+        self.assertEqual(self.dumps({'key': obj}, ensure_ascii=False),
+                         '{"key": "nonascii:é"}')
+        self.assertEqual(self.dumps(obj), '"nonascii:\\u00e9"')
+        self.assertEqual(self.dumps([obj]), '["nonascii:\\u00e9"]')
+        self.assertEqual(self.dumps({'key': obj}),
+                         '{"key": "nonascii:\\u00e9"}')
+
 
 class TestPyDump(TestDump, PyTest): pass
 
index c90d3e968e5ef9c8d5e47b00ba3e55172b09bb0a..1b5dfcfde01d1177764374f68058a5e62719d1fb 100644 (file)
@@ -3,6 +3,11 @@ from test.test_json import PyTest, CTest
 from test.support import bigaddrspacetest
 
 
+# str subclass which returns a different string on str(obj)
+class StrSubclass(str):
+    def __str__(self):
+        return "StrSubclass"
+
 CASES = [
     ('/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
     ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
@@ -14,6 +19,8 @@ CASES = [
     ('\U0001d120', '"\\ud834\\udd20"'),
     ('\u03b1\u03a9', '"\\u03b1\\u03a9"'),
     ("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
+    # Don't call obj.__str__() on str subclasses
+    (StrSubclass('ascii'), '"ascii"'),
 ]
 
 class TestEncodeBasestringAscii:
index 196229897bd6e3db667f9324482f4592b2b6a516..518c3e112006592564ee3aaa2ea663aafa6ecd5f 100644 (file)
@@ -31,6 +31,9 @@ class WeirdNum(float, Enum):
     neg_inf = NEG_INF
     nan = NAN
 
+class StringEnum(str, Enum):
+    COLOR = "color"
+
 class TestEnum:
 
     def test_floats(self):
@@ -116,5 +119,11 @@ class TestEnum:
         self.assertEqual(nd['j'], NEG_INF)
         self.assertTrue(isnan(nd['n']))
 
+    def test_str_enum(self):
+        obj = StringEnum.COLOR
+        self.assertEqual(self.dumps(obj), '"color"')
+        self.assertEqual(self.dumps([obj]), '["color"]')
+        self.assertEqual(self.dumps({'key': obj}), '{"key": "color"}')
+
 class TestPyEnum(TestEnum, PyTest): pass
 class TestCEnum(TestEnum, CTest): pass
diff --git a/Misc/NEWS.d/next/Library/2026-04-08-14-19-17.gh-issue-148241.fO_QT4.rst b/Misc/NEWS.d/next/Library/2026-04-08-14-19-17.gh-issue-148241.fO_QT4.rst
new file mode 100644 (file)
index 0000000..bf8d0e4
--- /dev/null
@@ -0,0 +1,2 @@
+:mod:`json`: Fix serialization: no longer call ``str(obj)`` on :class:`str`
+subclasses. Patch by Victor Stinner.
index 36614138501e791d68b54f184173462fbf617420..a20466de8c50e4624a5ebdab52c91bd76efb6863 100644 (file)
@@ -258,7 +258,10 @@ write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
         if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
             return -1;
         }
-        if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
+        // gh-148241: Avoid PyUnicodeWriter_WriteStr() which calls str(obj)
+        // on str subclasses
+        assert(PyUnicode_IS_ASCII(pystr));
+        if (PyUnicodeWriter_WriteASCII(writer, input, input_chars) < 0) {
             return -1;
         }
         return PyUnicodeWriter_WriteChar(writer, '"');
@@ -399,7 +402,9 @@ write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
         if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
             return -1;
         }
-        if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
+        // gh-148241: Avoid PyUnicodeWriter_WriteStr() which calls str(obj)
+        // on str subclasses
+        if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, pystr) < 0) {
             return -1;
         }
         return PyUnicodeWriter_WriteChar(writer, '"');