No longer use len() to get the length of the input data. For some buffer protocol objects, the length obtained by using len() is wrong.
Co-authored-by: Marco Ribeiro <marcoffee@users.noreply.github.com>
"""Write a byte string to the file.
Returns the number of uncompressed bytes written, which is
- always len(data). Note that due to buffering, the file on disk
- may not reflect the data written until close() is called.
+ always the length of data in bytes. Note that due to buffering,
+ the file on disk may not reflect the data written until close()
+ is called.
"""
with self._lock:
self._check_can_write()
+ if isinstance(data, (bytes, bytearray)):
+ length = len(data)
+ else:
+ # accept any data that supports the buffer protocol
+ data = memoryview(data)
+ length = data.nbytes
+
compressed = self._compressor.compress(data)
self._fp.write(compressed)
- self._pos += len(data)
- return len(data)
+ self._pos += length
+ return length
def writelines(self, seq):
"""Write a sequence of byte strings to the file.
"""Write a bytes object to the file.
Returns the number of uncompressed bytes written, which is
- always len(data). Note that due to buffering, the file on disk
- may not reflect the data written until close() is called.
+ always the length of data in bytes. Note that due to buffering,
+ the file on disk may not reflect the data written until close()
+ is called.
"""
self._check_can_write()
+ if isinstance(data, (bytes, bytearray)):
+ length = len(data)
+ else:
+ # accept any data that supports the buffer protocol
+ data = memoryview(data)
+ length = data.nbytes
+
compressed = self._compressor.compress(data)
self._fp.write(compressed)
- self._pos += len(data)
- return len(data)
+ self._pos += length
+ return length
def seek(self, offset, whence=io.SEEK_SET):
"""Change the file position.
from test import support
from test.support import bigmemtest, _4G
+import array
import unittest
from io import BytesIO, DEFAULT_BUFFER_SIZE
import os
with BZ2File(BytesIO(truncated[:i])) as f:
self.assertRaises(EOFError, f.read, 1)
+ def test_issue44439(self):
+ q = array.array('Q', [1, 2, 3, 4, 5])
+ LENGTH = len(q) * q.itemsize
+
+ with BZ2File(BytesIO(), 'w') as f:
+ self.assertEqual(f.write(q), LENGTH)
+ self.assertEqual(f.tell(), LENGTH)
+
class BZ2CompressorTest(BaseTest):
def testCompress(self):
import _compression
+import array
from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
import os
import pathlib
self.assertTrue(d2.eof)
self.assertEqual(out1 + out2, entire)
+ def test_issue44439(self):
+ q = array.array('Q', [1, 2, 3, 4, 5])
+ LENGTH = len(q) * q.itemsize
+
+ with LZMAFile(BytesIO(), 'w') as f:
+ self.assertEqual(f.write(q), LENGTH)
+ self.assertEqual(f.tell(), LENGTH)
+
class OpenTestCase(unittest.TestCase):
--- /dev/null
+Fix in :meth:`bz2.BZ2File.write` / :meth:`lzma.LZMAFile.write` methods, when
+the input data is an object that supports the buffer protocol, the file length
+may be wrong.