else:
self.write(BINUNICODE + pack("<I", n) + encoded)
else:
- obj = obj.replace("\\", "\\u005c")
- obj = obj.replace("\0", "\\u0000")
- obj = obj.replace("\n", "\\u000a")
- obj = obj.replace("\r", "\\u000d")
- obj = obj.replace("\x1a", "\\u001a") # EOF on DOS
- self.write(UNICODE + obj.encode('raw-unicode-escape') +
- b'\n')
+ # Escape what raw-unicode-escape doesn't, but memoize the original.
+ tmp = obj.replace("\\", "\\u005c")
+ tmp = tmp.replace("\0", "\\u0000")
+ tmp = tmp.replace("\n", "\\u000a")
+ tmp = tmp.replace("\r", "\\u000d")
+ tmp = tmp.replace("\x1a", "\\u001a") # EOF on DOS
+ self.write(UNICODE + tmp.encode('raw-unicode-escape') + b'\n')
self.memoize(obj)
dispatch[str] = save_str
t2 = self.loads(p)
self.assert_is_copy(t, t2)
+ def test_unicode_memoization(self):
+ # Repeated str is re-used (even when escapes added).
+ for proto in protocols:
+ for s in '', 'xyz', 'xyz\n', 'x\\yz', 'x\xa1yz\r':
+ p = self.dumps((s, s), proto)
+ s1, s2 = self.loads(p)
+ self.assertIs(s1, s2)
+
def test_bytes(self):
for proto in protocols:
for s in b'', b'xyz', b'xyz'*100:
--- /dev/null
+When a second reference to a string appears in the input to :mod:`pickle`,
+and the Python implementation is in use,
+we are guaranteed that a single copy gets pickled
+and a single object is shared when reloaded.
+Previously, in protocol 0, when a string contained certain characters
+(e.g. newline) it resulted in duplicate objects.