When compiled with `USE_ZLIB_CRC32` defined (`configure` sets this on POSIX systems), `binascii.crc32(...)` failed to compute the correct value when the input data was >= 4GiB. Because the zlib crc32 API is limited to a 32-bit length.
This lines it up with the `zlib.crc32(...)` implementation that doesn't have that flaw.
**Performance:** This also adopts the same GIL releasing for larger inputs logic that `zlib.crc32` has, and causes the Windows build to always use zlib's crc32 instead of our slow C code as zlib is a required build dependency on Windows.
import binascii
import array
import re
-from test.support import warnings_helper
+from test.support import bigmemtest, _1G, _4G, warnings_helper
# Note: "*_hex" functions are aliases for "(un)hexlify"
class MemoryviewBinASCIITest(BinASCIITest):
type2test = memoryview
+class ChecksumBigBufferTestCase(unittest.TestCase):
+ """bpo-38256 - check that inputs >=4 GiB are handled correctly."""
+
+ @bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
+ def test_big_buffer(self, size):
+ data = b"nyan" * (_1G + 1)
+ self.assertEqual(binascii.crc32(data), 1044521549)
+
if __name__ == "__main__":
unittest.main()
--- /dev/null
+Fix :func:`binascii.crc32` when it is compiled to use zlib'c crc32 to
+work properly on inputs 4+GiB in length instead of returning the wrong
+result. The workaround prior to this was to always feed the function
+data in increments smaller than 4GiB or to just call the zlib module
+function.
+
+We also have :func:`binascii.crc32` release the GIL when computing
+on larger inputs as :func:`zlib.crc32` and :mod:`hashlib` do.
+
+This also boosts performance on Windows as it now uses the zlib crc32
+implementation for :func:`binascii.crc32` for a 2-3x speedup.
+
+That the stdlib has a crc32 API in two modules is a known historical
+oddity. This moves us closer to a single implementation behind them.
0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
0x2d02ef8dU
};
+
+static unsigned int
+internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
+{ /* By Jim Ahlstrom; All rights transferred to CNRI */
+ unsigned int result;
+
+ crc = ~ crc;
+ while (len-- > 0) {
+ crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
+ /* Note: (crc >> 8) MUST zero fill on left */
+ }
+
+ result = (crc ^ 0xFFFFFFFF);
+ return result & 0xffffffff;
+}
#endif /* USE_ZLIB_CRC32 */
/*[clinic input]
/*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
#ifdef USE_ZLIB_CRC32
-/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
+/* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
+ * modules for historical reasons. */
{
- const Byte *buf;
- Py_ssize_t len;
- int signed_val;
-
- buf = (Byte*)data->buf;
- len = data->len;
- signed_val = crc32(crc, buf, len);
- return (unsigned int)signed_val & 0xffffffffU;
+ /* Releasing the GIL for very small buffers is inefficient
+ and may lower performance */
+ if (data->len > 1024*5) {
+ unsigned char *buf = data->buf;
+ Py_ssize_t len = data->len;
+
+ Py_BEGIN_ALLOW_THREADS
+ /* Avoid truncation of length for very large buffers. crc32() takes
+ length as an unsigned int, which may be narrower than Py_ssize_t. */
+ while ((size_t)len > UINT_MAX) {
+ crc = crc32(crc, buf, UINT_MAX);
+ buf += (size_t) UINT_MAX;
+ len -= (size_t) UINT_MAX;
+ }
+ crc = crc32(crc, buf, (unsigned int)len);
+ Py_END_ALLOW_THREADS
+ } else {
+ crc = crc32(crc, data->buf, (unsigned int)data->len);
+ }
+ return crc & 0xffffffff;
}
#else /* USE_ZLIB_CRC32 */
-{ /* By Jim Ahlstrom; All rights transferred to CNRI */
- const unsigned char *bin_data;
- Py_ssize_t len;
- unsigned int result;
-
- bin_data = data->buf;
- len = data->len;
-
- crc = ~ crc;
- while (len-- > 0) {
- crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
- /* Note: (crc >> 8) MUST zero fill on left */
+{
+ const unsigned char *bin_data = data->buf;
+ Py_ssize_t len = data->len;
+
+ /* Releasing the GIL for very small buffers is inefficient
+ and may lower performance */
+ if (len > 1024*5) {
+ unsigned int result;
+ Py_BEGIN_ALLOW_THREADS
+ result = internal_crc32(bin_data, len, crc);
+ Py_END_ALLOW_THREADS
+ return result;
+ } else {
+ return internal_crc32(bin_data, len, crc);
}
-
- result = (crc ^ 0xFFFFFFFF);
- return result & 0xffffffff;
}
#endif /* USE_ZLIB_CRC32 */
#define ZLIB_CRC32_METHODDEF \
{"crc32", (PyCFunction)(void(*)(void))zlib_crc32, METH_FASTCALL, zlib_crc32__doc__},
-static PyObject *
+static unsigned int
zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value);
static PyObject *
PyObject *return_value = NULL;
Py_buffer data = {NULL, NULL};
unsigned int value = 0;
+ unsigned int _return_value;
if (!_PyArg_CheckPositional("crc32", nargs, 1, 2)) {
goto exit;
goto exit;
}
skip_optional:
- return_value = zlib_crc32_impl(module, &data, value);
+ _return_value = zlib_crc32_impl(module, &data, value);
+ if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = PyLong_FromUnsignedLong((unsigned long)_return_value);
exit:
/* Cleanup for data */
#ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
#define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
#endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
-/*[clinic end generated code: output=e3e8a6142ea045a7 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=f009d194565416d1 input=a9049054013a1b77]*/
}
/*[clinic input]
-zlib.crc32
+zlib.crc32 -> unsigned_int
data: Py_buffer
value: unsigned_int(bitwise=True) = 0
The returned checksum is an integer.
[clinic start generated code]*/
-static PyObject *
+static unsigned int
zlib_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
-/*[clinic end generated code: output=63499fa20af7ea25 input=26c3ed430fa00b4c]*/
+/*[clinic end generated code: output=b217562e4fe6d6a6 input=1229cb2fb5ea948a]*/
{
/* Releasing the GIL for very small buffers is inefficient
and may lower performance */
} else {
value = crc32(value, data->buf, (unsigned int)data->len);
}
- return PyLong_FromUnsignedLong(value & 0xffffffffU);
+ return value;
}
<ClCompile Include="..\Modules\arraymodule.c" />
<ClCompile Include="..\Modules\atexitmodule.c" />
<ClCompile Include="..\Modules\audioop.c" />
- <ClCompile Include="..\Modules\binascii.c" />
+ <ClCompile Include="..\Modules\binascii.c">
+ <PreprocessorDefinitions>USE_ZLIB_CRC32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
<ClCompile Include="..\Modules\cmathmodule.c" />
<ClCompile Include="..\Modules\_datetimemodule.c" />
<ClCompile Include="..\Modules\errnomodule.c" />