bpo-38256: Fix binascii.crc32 large input. (GH-32000) (GH-32013) (GH-32015)

author Gregory P. Smith <greg@krypto.org>

Mon, 21 Mar 2022 06:34:45 +0000 (23:34 -0700)

committer GitHub <noreply@github.com>

Mon, 21 Mar 2022 06:34:45 +0000 (23:34 -0700)
author Gregory P. Smith <greg@krypto.org>
Mon, 21 Mar 2022 06:34:45 +0000 (23:34 -0700)
committer GitHub <noreply@github.com>
Mon, 21 Mar 2022 06:34:45 +0000 (23:34 -0700)
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py

index 45327953a7701ad1741f54a7cd54b62527528d0f..745329102f77bb4fc753976689378da89e259fee 100644 (file)
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -5,6 +5,8 @@ import binascii
  import array
  import re
  from test import support
+from test.support import bigmemtest, _1G, _4G, warnings_helper
+
  
  # Note: "*_hex" functions are aliases for "(un)hexlify"
  b2a_functions = ['b2a_base64', 'b2a_hex', 'b2a_hqx', 'b2a_qp', 'b2a_uu',
@@ -448,6 +450,14 @@ class BytearrayBinASCIITest(BinASCIITest):
  class MemoryviewBinASCIITest(BinASCIITest):
      type2test = memoryview
  
+class ChecksumBigBufferTestCase(unittest.TestCase):
+    """bpo-38256 - check that inputs >=4 GiB are handled correctly."""
+
+    @bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
+    def test_big_buffer(self, size):
+        data = b"nyan" * (_1G + 1)
+        self.assertEqual(binascii.crc32(data), 1044521549)
+
  
  if __name__ == "__main__":
      unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst

new file mode 100644 (file)

index 0000000..d9b5751
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst
@@ -0,0 +1,5 @@
+Fix :func:`binascii.crc32` when it is compiled to use zlib'c crc32 to
+work properly on inputs 4+GiB in length instead of returning the wrong
+result. The workaround prior to this was to always feed the function
+data in increments smaller than 4GiB or to just call the zlib module
+function.
diff --git a/Modules/binascii.c b/Modules/binascii.c

index 1f3248b6049b31e98328e8962d21b95b33c98e22..3777580a79f2a92d7a94c43fc0e29f0d1ba2bf9a 100644 (file)
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1120,16 +1120,20 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
  /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
  
  #ifdef USE_ZLIB_CRC32
-/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
+/* The same core as zlibmodule.c zlib_crc32_impl. */
  {
-    const Byte *buf;
-    Py_ssize_t len;
-    int signed_val;
-
-    buf = (Byte*)data->buf;
-    len = data->len;
-    signed_val = crc32(crc, buf, len);
-    return (unsigned int)signed_val & 0xffffffffU;
+    unsigned char *buf = data->buf;
+    Py_ssize_t len = data->len;
+
+    /* Avoid truncation of length for very large buffers. crc32() takes
+       length as an unsigned int, which may be narrower than Py_ssize_t. */
+    while ((size_t)len > UINT_MAX) {
+        crc = crc32(crc, buf, UINT_MAX);
+        buf += (size_t) UINT_MAX;
+        len -= (size_t) UINT_MAX;
+    }
+    crc = crc32(crc, buf, (unsigned int)len);
+    return crc & 0xffffffff;
  }
  #else  /* USE_ZLIB_CRC32 */
  { /* By Jim Ahlstrom; All rights transferred to CNRI */
author	Gregory P. Smith <greg@krypto.org>
	Mon, 21 Mar 2022 06:34:45 +0000 (23:34 -0700)
committer	GitHub <noreply@github.com>
	Mon, 21 Mar 2022 06:34:45 +0000 (23:34 -0700)
Lib/test/test_binascii.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst	[new file with mode: 0644]	patch \| blob
Modules/binascii.c		patch \| blob \| blame \| history