]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-148914: Fix memoization of in-band PickleBuffer in the Python implementation ...
authorSerhiy Storchaka <storchaka@gmail.com>
Sat, 2 May 2026 09:04:05 +0000 (12:04 +0300)
committerGitHub <noreply@github.com>
Sat, 2 May 2026 09:04:05 +0000 (12:04 +0300)
Previously, identical PickleBuffers did not preserve identity.
Also, empty writable PickleBuffer memoized an empty bytearray object
in place of b'' which is a singleton in CPython, so the following
references to b'' were unpickled as an empty bytearray object.

Lib/pickle.py
Lib/test/pickletester.py
Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst [new file with mode: 0644]

index 3e7cf25cb053379811456285f088c613cad81b88..95836afdc2b43e5f94a54e4079c68d97d6923c1d 100644 (file)
@@ -920,17 +920,11 @@ class _Pickler:
                     # Write data in-band
                     # XXX The C implementation avoids a copy here
                     buf = m.tobytes()
-                    in_memo = id(buf) in self.memo
                     if m.readonly:
-                        if in_memo:
-                            self._save_bytes_no_memo(buf)
-                        else:
-                            self.save_bytes(buf)
+                        self._save_bytes_no_memo(buf)
                     else:
-                        if in_memo:
-                            self._save_bytearray_no_memo(buf)
-                        else:
-                            self.save_bytearray(buf)
+                        self._save_bytearray_no_memo(buf)
+                    self.memoize(obj)
                 else:
                     # Write data out-of-band
                     self.write(NEXT_BUFFER)
index c2018c9785b9b305e2ee3142926b112e7144d781..9ba498ce8f575deb4f4a04ffa99d9106247a08f2 100644 (file)
@@ -3100,6 +3100,51 @@ class AbstractPickleTests:
                         self.assertIsNot(b2a, b2b)
                         self.assert_is_copy(b2a, b2b)
 
+    def test_picklebuffer_memoization(self):
+        if self.py_version < (3, 8):
+            self.skipTest('not supported in Python < 3.8')
+        array_types = [bytes, bytearray]
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            for array_type in array_types:
+                for s in b'', b'xyz', b'xyz'*100:
+                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
+                        b = pickle.PickleBuffer(array_type(s))
+                        p = self.dumps((b, b), proto)
+                        b1, b2 = self.loads(p)
+                        self.assertIs(b1, b2)
+
+                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
+                        b = array_type(s)
+                        b1a = pickle.PickleBuffer(b)
+                        b2a = pickle.PickleBuffer(b)
+                        p = self.dumps((b1a, b2a), proto)
+                        b1b, b2b = self.loads(p)
+                        if array_type is not bytes:
+                            self.assertIsNot(b1b, b2b)
+                        self.assert_is_copy(b1b, b)
+                        self.assert_is_copy(b2b, b)
+
+    def test_empty_picklebuffer_memoization(self):
+        # gh-148914: Empty writable PickleBuffer memoized an empty bytearray
+        # with the id of b'' (a singleton in CPython).
+        if self.py_version < (3, 8):
+            self.skipTest('not supported in Python < 3.8')
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            for readonly in False, True:
+                with self.subTest(proto=proto, readonly=readonly):
+                    b = b''
+                    ba = bytearray()
+                    buf = pickle.PickleBuffer(b if readonly else ba)
+                    p = self.dumps((buf, b, ba), proto)
+                    buf, b, ba = self.loads(p)
+                    array_type = bytes if readonly else bytearray
+                    self.assertIsInstance(buf, array_type)
+                    self.assertIsInstance(b, bytes)
+                    self.assertIsInstance(ba, bytearray)
+                    self.assertEqual(buf, b'')
+                    self.assertEqual(b, b'')
+                    self.assertEqual(ba, b'')
+
     def test_ints(self):
         for proto in protocols:
             n = sys.maxsize
diff --git a/Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst b/Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst
new file mode 100644 (file)
index 0000000..8348aad
--- /dev/null
@@ -0,0 +1,6 @@
+Fix memoization of in-band :class:`~pickle.PickleBuffer` in the Python
+implementation of :mod:`pickle`. Previously, identical
+:class:`!PickleBuffer`\ s did not preserve identity, and empty writable
+:class:`!PickleBuffer` memoized an empty bytearray object in place of
+``b''``, so the following references to ``b''`` were unpickled as an empty
+bytearray object.