]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-148653: Fix some marshal errors related to recursive code objects (GH-14869...
authorSerhiy Storchaka <storchaka@gmail.com>
Sat, 18 Apr 2026 09:29:38 +0000 (12:29 +0300)
committerGitHub <noreply@github.com>
Sat, 18 Apr 2026 09:29:38 +0000 (09:29 +0000)
(cherry picked from commit d496c637a3dac2cc5d51aa4a7ebd9a740143a719)

Forbid marshalling recursive code objects which
cannot be correctly unmarshalled.
Add multiple tests for recursive data structures.
(cherry picked from commit 2e37d836411e99cff7bb341ba14be5ea95fac08c)

Lib/test/test_marshal.py
Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst [new file with mode: 0644]
Python/marshal.c

index 4aeaaf6e176ffc23951a8d6b86bdcfba4f38f483..3965dd5332421b0e4d8849a9e38f31b9377ba4f1 100644 (file)
@@ -310,6 +310,96 @@ class BugsTestCase(unittest.TestCase):
         last.append([0])
         self.assertRaises(ValueError, marshal.dumps, head)
 
+    def test_reference_loop_list(self):
+        a = []
+        a.append(a)
+        for v in range(3):
+            self.assertRaises(ValueError, marshal.dumps, a, v)
+        for v in range(3, marshal.version + 1):
+            d = marshal.dumps(a, v)
+            b = marshal.loads(d)
+            self.assertIsInstance(b, list)
+            self.assertIs(b[0], b)
+
+    def test_reference_loop_dict(self):
+        a = {}
+        a[None] = a
+        for v in range(3):
+            self.assertRaises(ValueError, marshal.dumps, a, v)
+        for v in range(3, marshal.version + 1):
+            d = marshal.dumps(a, v)
+            b = marshal.loads(d)
+            self.assertIsInstance(b, dict)
+            self.assertIs(b[None], b)
+
+    def test_reference_loop_tuple(self):
+        a = ([],)
+        a[0].append(a)
+        for v in range(3):
+            self.assertRaises(ValueError, marshal.dumps, a, v)
+        for v in range(3, marshal.version + 1):
+            d = marshal.dumps(a, v)
+            b = marshal.loads(d)
+            self.assertIsInstance(b, tuple)
+            self.assertIsInstance(b[0], list)
+            self.assertIs(b[0][0], b)
+
+    def test_reference_loop_code(self):
+        def f():
+            return 1234.5
+        code = f.__code__
+        a = []
+        code = code.replace(co_consts=code.co_consts + (a,))
+        a.append(code)
+        for v in range(marshal.version + 1):
+            self.assertRaises(ValueError, marshal.dumps, code, v)
+
+    def test_loads_reference_loop_list(self):
+        data = b'\xdb\x01\x00\x00\x00r\x00\x00\x00\x00' # [<R>]
+        a = marshal.loads(data)
+        self.assertIsInstance(a, list)
+        self.assertIs(a[0], a)
+
+    def test_loads_reference_loop_dict(self):
+        data = b'\xfbNr\x00\x00\x00\x000' # {None: <R>}
+        a = marshal.loads(data)
+        self.assertIsInstance(a, dict)
+        self.assertIs(a[None], a)
+
+    def test_loads_abnormal_reference_loops(self):
+        # Indirect self-references of tuples.
+        data = b'\xa8\x01\x00\x00\x00[\x01\x00\x00\x00r\x00\x00\x00\x00' # ([<R>],)
+        a = marshal.loads(data)
+        self.assertIsInstance(a, tuple)
+        self.assertIsInstance(a[0], list)
+        self.assertIs(a[0][0], a)
+
+        data = b'\xa8\x01\x00\x00\x00{Nr\x00\x00\x00\x000' # ({None: <R>},)
+        a = marshal.loads(data)
+        self.assertIsInstance(a, tuple)
+        self.assertIsInstance(a[0], dict)
+        self.assertIs(a[0][None], a)
+
+        # Direct self-reference which cannot be created in Python.
+        data = b'\xa8\x01\x00\x00\x00r\x00\x00\x00\x00' # (<R>,)
+        a = marshal.loads(data)
+        self.assertIsInstance(a, tuple)
+        self.assertIs(a[0], a)
+
+        # Direct self-references which cannot be created in Python
+        # because of unhashability.
+        data = b'\xfbr\x00\x00\x00\x00N0' # {<R>: None}
+        self.assertRaises(TypeError, marshal.loads, data)
+        data = b'\xbc\x01\x00\x00\x00r\x00\x00\x00\x00' # {<R>}
+        self.assertRaises(TypeError, marshal.loads, data)
+
+        for data in [
+            # Direct self-references which cannot be created in Python.
+            b'\xbe\x01\x00\x00\x00r\x00\x00\x00\x00', # frozenset({<R>})
+        ]:
+            with self.subTest(data=data):
+                self.assertRaises(ValueError, marshal.loads, data)
+
     def test_exact_type_match(self):
         # Former bug:
         #   >>> class Int(int): pass
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst
new file mode 100644 (file)
index 0000000..2edcb35
--- /dev/null
@@ -0,0 +1,2 @@
+Forbid :mod:`marshalling <marshal>` recursive code objects
+which cannot be correctly unmarshalled.
index 65481341bdf98bef269ea4b9a121df854a320a2e..c0eb5b4f3dd7fd138a42b77a7899445fd29dcde8 100644 (file)
@@ -310,7 +310,6 @@ static int
 w_ref(PyObject *v, char *flag, WFILE *p)
 {
     _Py_hashtable_entry_t *entry;
-    int w;
 
     if (p->version < 3 || p->hashtable == NULL)
         return 0; /* not writing object references */
@@ -327,20 +326,28 @@ w_ref(PyObject *v, char *flag, WFILE *p)
     entry = _Py_hashtable_get_entry(p->hashtable, v);
     if (entry != NULL) {
         /* write the reference index to the stream */
-        w = (int)(uintptr_t)entry->value;
+        uintptr_t w = (uintptr_t)entry->value;
+        if (w & 0x80000000LU) {
+            PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v);
+            goto err;
+        }
         /* we don't store "long" indices in the dict */
-        assert(0 <= w && w <= 0x7fffffff);
+        assert(w <= 0x7fffffff);
         w_byte(TYPE_REF, p);
-        w_long(w, p);
+        w_long((int)w, p);
         return 1;
     } else {
-        size_t s = p->hashtable->nentries;
+        size_t w = p->hashtable->nentries;
         /* we don't support long indices */
-        if (s >= 0x7fffffff) {
+        if (w >= 0x7fffffff) {
             PyErr_SetString(PyExc_ValueError, "too many objects");
             goto err;
         }
-        w = (int)s;
+        // Corresponding code should call w_complete() after
+        // writing the object.
+        if (PyCode_Check(v)) {
+            w |= 0x80000000LU;
+        }
         if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
                               (void *)(uintptr_t)w) < 0) {
             Py_DECREF(v);
@@ -354,6 +361,27 @@ err:
     return 1;
 }
 
+static void
+w_complete(PyObject *v, WFILE *p)
+{
+    if (p->version < 3 || p->hashtable == NULL) {
+        return;
+    }
+    if (Py_REFCNT(v) == 1) {
+        return;
+    }
+
+    _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v);
+    if (entry == NULL) {
+        return;
+    }
+    assert(entry != NULL);
+    uintptr_t w = (uintptr_t)entry->value;
+    assert(w & 0x80000000LU);
+    w &= ~0x80000000LU;
+    entry->value = (void *)(uintptr_t)w;
+}
+
 static void
 w_complex_object(PyObject *v, char flag, WFILE *p);
 
@@ -603,6 +631,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
         w_object(co->co_linetable, p);
         w_object(co->co_exceptiontable, p);
         Py_DECREF(co_code);
+        w_complete(v, p);
     }
     else if (PyObject_CheckBuffer(v)) {
         /* Write unknown bytes-like objects as a bytes object */