From: Serhiy Storchaka Date: Sat, 18 Apr 2026 09:29:38 +0000 (+0300) Subject: [3.13] gh-148653: Fix some marshal errors related to recursive code objects (GH-14869... X-Git-Url: http://git.ipfire.org/index.cgi?a=commitdiff_plain;h=ac1c1e7ef05a15fdabdf38d11cbfb02a93ad6cfa;p=thirdparty%2FPython%2Fcpython.git [3.13] gh-148653: Fix some marshal errors related to recursive code objects (GH-148698) (GH-148711) (GH-148713) (cherry picked from commit d496c637a3dac2cc5d51aa4a7ebd9a740143a719) Forbid marshalling recursive code objects which cannot be correctly unmarshalled. Add multiple tests for recursive data structures. (cherry picked from commit 2e37d836411e99cff7bb341ba14be5ea95fac08c) --- diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 4aeaaf6e176f..3965dd533242 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -310,6 +310,96 @@ class BugsTestCase(unittest.TestCase): last.append([0]) self.assertRaises(ValueError, marshal.dumps, head) + def test_reference_loop_list(self): + a = [] + a.append(a) + for v in range(3): + self.assertRaises(ValueError, marshal.dumps, a, v) + for v in range(3, marshal.version + 1): + d = marshal.dumps(a, v) + b = marshal.loads(d) + self.assertIsInstance(b, list) + self.assertIs(b[0], b) + + def test_reference_loop_dict(self): + a = {} + a[None] = a + for v in range(3): + self.assertRaises(ValueError, marshal.dumps, a, v) + for v in range(3, marshal.version + 1): + d = marshal.dumps(a, v) + b = marshal.loads(d) + self.assertIsInstance(b, dict) + self.assertIs(b[None], b) + + def test_reference_loop_tuple(self): + a = ([],) + a[0].append(a) + for v in range(3): + self.assertRaises(ValueError, marshal.dumps, a, v) + for v in range(3, marshal.version + 1): + d = marshal.dumps(a, v) + b = marshal.loads(d) + self.assertIsInstance(b, tuple) + self.assertIsInstance(b[0], list) + self.assertIs(b[0][0], b) + + def test_reference_loop_code(self): + def f(): + return 1234.5 + code = f.__code__ + a = [] + code = code.replace(co_consts=code.co_consts + (a,)) + a.append(code) + for v in range(marshal.version + 1): + self.assertRaises(ValueError, marshal.dumps, code, v) + + def test_loads_reference_loop_list(self): + data = b'\xdb\x01\x00\x00\x00r\x00\x00\x00\x00' # [] + a = marshal.loads(data) + self.assertIsInstance(a, list) + self.assertIs(a[0], a) + + def test_loads_reference_loop_dict(self): + data = b'\xfbNr\x00\x00\x00\x000' # {None: } + a = marshal.loads(data) + self.assertIsInstance(a, dict) + self.assertIs(a[None], a) + + def test_loads_abnormal_reference_loops(self): + # Indirect self-references of tuples. + data = b'\xa8\x01\x00\x00\x00[\x01\x00\x00\x00r\x00\x00\x00\x00' # ([],) + a = marshal.loads(data) + self.assertIsInstance(a, tuple) + self.assertIsInstance(a[0], list) + self.assertIs(a[0][0], a) + + data = b'\xa8\x01\x00\x00\x00{Nr\x00\x00\x00\x000' # ({None: },) + a = marshal.loads(data) + self.assertIsInstance(a, tuple) + self.assertIsInstance(a[0], dict) + self.assertIs(a[0][None], a) + + # Direct self-reference which cannot be created in Python. + data = b'\xa8\x01\x00\x00\x00r\x00\x00\x00\x00' # (,) + a = marshal.loads(data) + self.assertIsInstance(a, tuple) + self.assertIs(a[0], a) + + # Direct self-references which cannot be created in Python + # because of unhashability. + data = b'\xfbr\x00\x00\x00\x00N0' # {: None} + self.assertRaises(TypeError, marshal.loads, data) + data = b'\xbc\x01\x00\x00\x00r\x00\x00\x00\x00' # {} + self.assertRaises(TypeError, marshal.loads, data) + + for data in [ + # Direct self-references which cannot be created in Python. + b'\xbe\x01\x00\x00\x00r\x00\x00\x00\x00', # frozenset({}) + ]: + with self.subTest(data=data): + self.assertRaises(ValueError, marshal.loads, data) + def test_exact_type_match(self): # Former bug: # >>> class Int(int): pass diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst new file mode 100644 index 000000000000..2edcb35aa98a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst @@ -0,0 +1,2 @@ +Forbid :mod:`marshalling ` recursive code objects +which cannot be correctly unmarshalled. diff --git a/Python/marshal.c b/Python/marshal.c index 65481341bdf9..c0eb5b4f3dd7 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -310,7 +310,6 @@ static int w_ref(PyObject *v, char *flag, WFILE *p) { _Py_hashtable_entry_t *entry; - int w; if (p->version < 3 || p->hashtable == NULL) return 0; /* not writing object references */ @@ -327,20 +326,28 @@ w_ref(PyObject *v, char *flag, WFILE *p) entry = _Py_hashtable_get_entry(p->hashtable, v); if (entry != NULL) { /* write the reference index to the stream */ - w = (int)(uintptr_t)entry->value; + uintptr_t w = (uintptr_t)entry->value; + if (w & 0x80000000LU) { + PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v); + goto err; + } /* we don't store "long" indices in the dict */ - assert(0 <= w && w <= 0x7fffffff); + assert(w <= 0x7fffffff); w_byte(TYPE_REF, p); - w_long(w, p); + w_long((int)w, p); return 1; } else { - size_t s = p->hashtable->nentries; + size_t w = p->hashtable->nentries; /* we don't support long indices */ - if (s >= 0x7fffffff) { + if (w >= 0x7fffffff) { PyErr_SetString(PyExc_ValueError, "too many objects"); goto err; } - w = (int)s; + // Corresponding code should call w_complete() after + // writing the object. + if (PyCode_Check(v)) { + w |= 0x80000000LU; + } if (_Py_hashtable_set(p->hashtable, Py_NewRef(v), (void *)(uintptr_t)w) < 0) { Py_DECREF(v); @@ -354,6 +361,27 @@ err: return 1; } +static void +w_complete(PyObject *v, WFILE *p) +{ + if (p->version < 3 || p->hashtable == NULL) { + return; + } + if (Py_REFCNT(v) == 1) { + return; + } + + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v); + if (entry == NULL) { + return; + } + assert(entry != NULL); + uintptr_t w = (uintptr_t)entry->value; + assert(w & 0x80000000LU); + w &= ~0x80000000LU; + entry->value = (void *)(uintptr_t)w; +} + static void w_complex_object(PyObject *v, char flag, WFILE *p); @@ -603,6 +631,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_object(co->co_linetable, p); w_object(co->co_exceptiontable, p); Py_DECREF(co_code); + w_complete(v, p); } else if (PyObject_CheckBuffer(v)) { /* Write unknown bytes-like objects as a bytes object */