From: Antoine Pitrou Date: Mon, 29 Aug 2011 21:14:53 +0000 (+0200) Subject: Issue #11564: Avoid crashes when trying to pickle huge objects or containers X-Git-Tag: v3.3.0a1~1603 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ee763e2acc469fa2f423440517b9bc227fbbe79c;p=thirdparty%2FPython%2Fcpython.git Issue #11564: Avoid crashes when trying to pickle huge objects or containers (more than 2**31 items). Instead, in most cases, an OverflowError is raised. --- ee763e2acc469fa2f423440517b9bc227fbbe79c diff --cc Lib/test/pickletester.py index e862d0771edb,2b1fdd222a38..807221a5fe14 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@@ -2,11 -2,14 +2,15 @@@ import i import unittest import pickle import pickletools + import sys import copyreg +import weakref from http.cookies import SimpleCookie - from test.support import TestFailed, TESTFN, run_with_locale, no_tracing + from test.support import ( - TestFailed, TESTFN, run_with_locale, ++ TestFailed, TESTFN, run_with_locale, no_tracing, + _2G, _4G, precisionbigmemtest, + ) from pickle import bytes_types @@@ -1118,15 -1104,99 +1124,108 @@@ class AbstractPickleTests(unittest.Test empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r') self.assertEqual(empty, '') + def test_int_pickling_efficiency(self): + # Test compacity of int representation (see issue #12744) + for proto in protocols: + sizes = [len(self.dumps(2**n, proto)) for n in range(70)] + # the size function is monotonic + self.assertEqual(sorted(sizes), sizes) + if proto >= 2: + self.assertLessEqual(sizes[-1], 14) + + def check_negative_32b_binXXX(self, dumped): + if sys.maxsize > 2**32: + self.skipTest("test is only meaningful on 32-bit builds") + # XXX Pure Python pickle reads lengths as signed and passes + # them directly to read() (hence the EOFError) + with self.assertRaises((pickle.UnpicklingError, EOFError, + ValueError, OverflowError)): + self.loads(dumped) + + def test_negative_32b_binbytes(self): + # On 32-bit builds, a BINBYTES of 2**31 or more is refused + self.check_negative_32b_binXXX(b'\x80\x03B\xff\xff\xff\xffxyzq\x00.') + + def test_negative_32b_binunicode(self): + # On 32-bit builds, a BINUNICODE of 2**31 or more is refused + self.check_negative_32b_binXXX(b'\x80\x03X\xff\xff\xff\xffxyzq\x00.') + + + class BigmemPickleTests(unittest.TestCase): + + # Binary protocols can serialize longs of up to 2GB-1 + + @precisionbigmemtest(size=_2G, memuse=1 + 1, dry_run=False) + def test_huge_long_32b(self, size): + data = 1 << (8 * size) + try: + for proto in protocols: + if proto < 2: + continue + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + finally: + data = None + + # Protocol 3 can serialize up to 4GB-1 as a bytes object + # (older protocols don't have a dedicated opcode for bytes and are + # too inefficient) + + @precisionbigmemtest(size=_2G, memuse=1 + 1, dry_run=False) + def test_huge_bytes_32b(self, size): + data = b"abcd" * (size // 4) + try: + for proto in protocols: + if proto < 3: + continue + try: + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:15]) + self.assertTrue(b"abcd" in pickled[-15:]) + finally: + pickled = None + finally: + data = None + + @precisionbigmemtest(size=_4G, memuse=1 + 1, dry_run=False) + def test_huge_bytes_64b(self, size): + data = b"a" * size + try: + for proto in protocols: + if proto < 3: + continue + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + finally: + data = None + + # All protocols use 1-byte per printable ASCII character; we add another + # byte because the encoded form has to be copied into the internal buffer. + + @precisionbigmemtest(size=_2G, memuse=2 + character_size, dry_run=False) + def test_huge_str_32b(self, size): + data = "abcd" * (size // 4) + try: + for proto in protocols: + try: + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:15]) + self.assertTrue(b"abcd" in pickled[-15:]) + finally: + pickled = None + finally: + data = None + + @precisionbigmemtest(size=_4G, memuse=1 + character_size, dry_run=False) + def test_huge_str_64b(self, size): + data = "a" * size + try: + for proto in protocols: + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + finally: + data = None + # Test classes for reduce_ex diff --cc Lib/test/support.py index b3989e52a887,8e6ca2a5412c..d00a51324a1f --- a/Lib/test/support.py +++ b/Lib/test/support.py @@@ -1142,12 -1089,7 +1142,12 @@@ def bigmemtest(minsize, memuse) return wrapper return decorator - def precisionbigmemtest(size, memuse): + def precisionbigmemtest(size, memuse, dry_run=True): + """Decorator for bigmem tests that need exact sizes. + + Like bigmemtest, but without the size scaling upward to fill available + memory. + """ def decorator(f): def wrapper(self): size = wrapper.size diff --cc Misc/NEWS index 17332914de11,61f5bd2f314a..c8c1fa698418 --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -268,51 -91,6 +268,54 @@@ Core and Builtin Library ------- ++- Issue #11564: Avoid crashes when trying to pickle huge objects or containers ++ (more than 2**31 items). Instead, in most cases, an OverflowError is raised. ++ +- Issue #12287: Fix a stack corruption in ossaudiodev module when the FD is + greater than FD_SETSIZE. + +- Issue #12839: Fix crash in zlib module due to version mismatch. + Fix by Richard M. Tew. + +- Issue #9923: The mailcap module now correctly uses the platform path + separator for the MAILCAP environment variable on non-POSIX platforms. + +- Issue #12835: Follow up to #6560 that unconditionally prevents use of the + unencrypted sendmsg/recvmsg APIs on SSL wrapped sockets. Patch by David + Watson. + +- Issue #12803: SSLContext.load_cert_chain() now accepts a password argument + to be used if the private key is encrypted. Patch by Adam Simpkins. + +- Issue #11657: Fix sending file descriptors over 255 over a multiprocessing + Pipe. + +- Issue #12811: tabnanny.check() now promptly closes checked files. Patch by + Anthony Briggs. + +- Issue #6560: The sendmsg/recvmsg API is now exposed by the socket module + when provided by the underlying platform, supporting processing of + ancillary data in pure Python code. Patch by David Watson and Heiko Wundram. + +- Issue #12326: On Linux, sys.platform doesn't contain the major version + anymore. It is now always 'linux', instead of 'linux2' or 'linux3' depending + on the Linux version used to build Python. + +- Issue #12213: Fix a buffering bug with interleaved reads and writes that + could appear on BufferedRandom streams. + +- Issue #12778: Reduce memory consumption when JSON-encoding a large + container of many small objects. + +- Issue #12650: Fix a race condition where a subprocess.Popen could leak + resources (FD/zombie) when killed at the wrong time. + +- Issue #12744: Fix inefficient representation of integers between 2**31 and + 2**63 on systems with a 64-bit C "long". + +- Issue #12646: Add an 'eof' attribute to zlib.Decompress, to make it easier to + detect truncated input streams. + - Issue #11513: Fix exception handling ``tarfile.TarFile.gzopen()`` when the file cannot be opened.