gh-128646: Implement GzipFile.readinto[1]() methods (GH-128647)

author Chris Markiewicz <markiewicz@stanford.edu>

Sat, 8 Mar 2025 03:04:45 +0000 (22:04 -0500)

committer GitHub <noreply@github.com>

Sat, 8 Mar 2025 03:04:45 +0000 (21:04 -0600)
author Chris Markiewicz <markiewicz@stanford.edu>
Sat, 8 Mar 2025 03:04:45 +0000 (22:04 -0500)
committer GitHub <noreply@github.com>
Sat, 8 Mar 2025 03:04:45 +0000 (21:04 -0600)
diff --git a/Lib/gzip.py b/Lib/gzip.py

index 7e384f8a568c1cf9cb77a164f2aec86b80977224..d681ef6b488dad76b23422e63da0c1c7342485f4 100644 (file)
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -325,11 +325,15 @@ class GzipFile(_compression.BaseStream):
  
          return length
  
-    def read(self, size=-1):
-        self._check_not_closed()
+    def _check_read(self, caller):
          if self.mode != READ:
              import errno
-            raise OSError(errno.EBADF, "read() on write-only GzipFile object")
+            msg = f"{caller}() on write-only GzipFile object"
+            raise OSError(errno.EBADF, msg)
+
+    def read(self, size=-1):
+        self._check_not_closed()
+        self._check_read("read")
          return self._buffer.read(size)
  
      def read1(self, size=-1):
@@ -337,19 +341,25 @@ class GzipFile(_compression.BaseStream):
  
          Reads up to a buffer's worth of data if size is negative."""
          self._check_not_closed()
-        if self.mode != READ:
-            import errno
-            raise OSError(errno.EBADF, "read1() on write-only GzipFile object")
+        self._check_read("read1")
  
          if size < 0:
              size = io.DEFAULT_BUFFER_SIZE
          return self._buffer.read1(size)
  
+    def readinto(self, b):
+        self._check_not_closed()
+        self._check_read("readinto")
+        return self._buffer.readinto(b)
+
+    def readinto1(self, b):
+        self._check_not_closed()
+        self._check_read("readinto1")
+        return self._buffer.readinto1(b)
+
      def peek(self, n):
          self._check_not_closed()
-        if self.mode != READ:
-            import errno
-            raise OSError(errno.EBADF, "peek() on write-only GzipFile object")
+        self._check_read("peek")
          return self._buffer.peek(n)
  
      @property
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py

index 0940bb114df625d2473c22ba9ce46b2dc239ab6a..260fae5ae1b3689e738d77e24a88de33eb6ca2a6 100644 (file)
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -143,6 +143,38 @@ class TestGzip(BaseTest):
                  self.assertEqual(f.tell(), nread)
          self.assertEqual(b''.join(blocks), data1 * 50)
  
+    def test_readinto(self):
+        # 10MB of uncompressible data to ensure multiple reads
+        large_data = os.urandom(10 * 2**20)
+        with gzip.GzipFile(self.filename, 'wb') as f:
+            f.write(large_data)
+
+        buf = bytearray(len(large_data))
+        with gzip.GzipFile(self.filename, 'r') as f:
+            nbytes = f.readinto(buf)
+        self.assertEqual(nbytes, len(large_data))
+        self.assertEqual(buf, large_data)
+
+    def test_readinto1(self):
+        # 10MB of uncompressible data to ensure multiple reads
+        large_data = os.urandom(10 * 2**20)
+        with gzip.GzipFile(self.filename, 'wb') as f:
+            f.write(large_data)
+
+        nread = 0
+        buf = bytearray(len(large_data))
+        memview = memoryview(buf)  # Simplifies slicing
+        with gzip.GzipFile(self.filename, 'r') as f:
+            for count in range(200):
+                nbytes = f.readinto1(memview[nread:])
+                if not nbytes:
+                    break
+                nread += nbytes
+                self.assertEqual(f.tell(), nread)
+        self.assertEqual(buf, large_data)
+        # readinto1() should require multiple loops
+        self.assertGreater(count, 1)
+
      @bigmemtest(size=_4G, memuse=1)
      def test_read_large(self, size):
          # Read chunk size over UINT_MAX should be supported, despite zlib's
diff --git a/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst

new file mode 100644 (file)

index 0000000..034a66b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst
@@ -0,0 +1,4 @@
+Eagerly write to buffers passed to :class:`gzip.GzipFile`'s
+:meth:`~io.BufferedIOBase.readinto` and
+:meth:`~io.BufferedIOBase.readinto1` implementations,
+avoiding unnecessary allocations. Patch by Chris Markiewicz.
author	Chris Markiewicz <markiewicz@stanford.edu>
	Sat, 8 Mar 2025 03:04:45 +0000 (22:04 -0500)
committer	GitHub <noreply@github.com>
	Sat, 8 Mar 2025 03:04:45 +0000 (21:04 -0600)
Lib/gzip.py		patch \| blob \| blame \| history
Lib/test/test_gzip.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst	[new file with mode: 0644]	patch \| blob