]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.14] gh-148914: Fix memoization of in-band PickleBuffer in the Python implementatio...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Sat, 2 May 2026 09:30:16 +0000 (11:30 +0200)
committerGitHub <noreply@github.com>
Sat, 2 May 2026 09:30:16 +0000 (09:30 +0000)
Previously, identical PickleBuffers did not preserve identity.
Also, empty writable PickleBuffer memoized an empty bytearray object
in place of b'' which is a singleton in CPython, so the following
references to b'' were unpickled as an empty bytearray object.
(cherry picked from commit b89735625dff07005c31bdc86cbe7113ef1b59d0)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Lib/pickle.py
Lib/test/pickletester.py
Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst [new file with mode: 0644]

index beaefae0479d3ca15347f6b1393b57e19c01b2ef..7b951858604bb93f101873e69ca91c8d2eae6c03 100644 (file)
@@ -904,17 +904,11 @@ class _Pickler:
                     # Write data in-band
                     # XXX The C implementation avoids a copy here
                     buf = m.tobytes()
-                    in_memo = id(buf) in self.memo
                     if m.readonly:
-                        if in_memo:
-                            self._save_bytes_no_memo(buf)
-                        else:
-                            self.save_bytes(buf)
+                        self._save_bytes_no_memo(buf)
                     else:
-                        if in_memo:
-                            self._save_bytearray_no_memo(buf)
-                        else:
-                            self.save_bytearray(buf)
+                        self._save_bytearray_no_memo(buf)
+                    self.memoize(obj)
                 else:
                     # Write data out-of-band
                     self.write(NEXT_BUFFER)
index cd9093c5dffcae47fe4ca08f17e18e8b835b7bff..0dc4749ac5fa33d0fa1e1f4acbb538a48552d5b8 100644 (file)
@@ -2855,6 +2855,51 @@ class AbstractPickleTests:
                         self.assertIsNot(b2a, b2b)
                         self.assert_is_copy(b2a, b2b)
 
+    def test_picklebuffer_memoization(self):
+        if self.py_version < (3, 8):
+            self.skipTest('not supported in Python < 3.8')
+        array_types = [bytes, bytearray]
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            for array_type in array_types:
+                for s in b'', b'xyz', b'xyz'*100:
+                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
+                        b = pickle.PickleBuffer(array_type(s))
+                        p = self.dumps((b, b), proto)
+                        b1, b2 = self.loads(p)
+                        self.assertIs(b1, b2)
+
+                    with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
+                        b = array_type(s)
+                        b1a = pickle.PickleBuffer(b)
+                        b2a = pickle.PickleBuffer(b)
+                        p = self.dumps((b1a, b2a), proto)
+                        b1b, b2b = self.loads(p)
+                        if array_type is not bytes:
+                            self.assertIsNot(b1b, b2b)
+                        self.assert_is_copy(b1b, b)
+                        self.assert_is_copy(b2b, b)
+
+    def test_empty_picklebuffer_memoization(self):
+        # gh-148914: Empty writable PickleBuffer memoized an empty bytearray
+        # with the id of b'' (a singleton in CPython).
+        if self.py_version < (3, 8):
+            self.skipTest('not supported in Python < 3.8')
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            for readonly in False, True:
+                with self.subTest(proto=proto, readonly=readonly):
+                    b = b''
+                    ba = bytearray()
+                    buf = pickle.PickleBuffer(b if readonly else ba)
+                    p = self.dumps((buf, b, ba), proto)
+                    buf, b, ba = self.loads(p)
+                    array_type = bytes if readonly else bytearray
+                    self.assertIsInstance(buf, array_type)
+                    self.assertIsInstance(b, bytes)
+                    self.assertIsInstance(ba, bytearray)
+                    self.assertEqual(buf, b'')
+                    self.assertEqual(b, b'')
+                    self.assertEqual(ba, b'')
+
     def test_ints(self):
         for proto in protocols:
             n = sys.maxsize
diff --git a/Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst b/Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst
new file mode 100644 (file)
index 0000000..8348aad
--- /dev/null
@@ -0,0 +1,6 @@
+Fix memoization of in-band :class:`~pickle.PickleBuffer` in the Python
+implementation of :mod:`pickle`. Previously, identical
+:class:`!PickleBuffer`\ s did not preserve identity, and empty writable
+:class:`!PickleBuffer` memoized an empty bytearray object in place of
+``b''``, so the following references to ``b''`` were unpickled as an empty
+bytearray object.