]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-129005: Align FileIO.readall() allocation (#129458)
authorCody Maloney <cmaloney@users.noreply.github.com>
Thu, 30 Jan 2025 11:14:23 +0000 (03:14 -0800)
committerGitHub <noreply@github.com>
Thu, 30 Jan 2025 11:14:23 +0000 (11:14 +0000)
Both now use a pre-allocated buffer of length `bufsize`, fill it using
a readinto(), and have matching "expand buffer" logic.

On my machine this takes:

`./python -m test -M8g -uall test_largefile -m test_large_read -v`

from ~3.7 seconds to ~3.4 seconds.

Lib/_pyio.py
Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst [new file with mode: 0644]

index 023478aa78c6a04b2320f3a360fe1be9e020bf6d..76a27910da4d5f375508881b565373dc523e9bfe 100644 (file)
@@ -1674,22 +1674,31 @@ class FileIO(RawIOBase):
                 except OSError:
                     pass
 
-        result = bytearray()
+        result = bytearray(bufsize)
+        bytes_read = 0
         while True:
-            if len(result) >= bufsize:
-                bufsize = len(result)
-                bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
-            n = bufsize - len(result)
+            if bytes_read >= bufsize:
+                # Parallels _io/fileio.c new_buffersize
+                if bufsize > 65536:
+                    addend = bufsize >> 3
+                else:
+                    addend = bufsize + 256
+                if addend < DEFAULT_BUFFER_SIZE:
+                    addend = DEFAULT_BUFFER_SIZE
+                bufsize += addend
+                result[bytes_read:bufsize] = b'\0'
+            assert bufsize - bytes_read > 0, "Should always try and read at least one byte"
             try:
-                chunk = os.read(self._fd, n)
+                n = os.readinto(self._fd, memoryview(result)[bytes_read:])
             except BlockingIOError:
-                if result:
+                if bytes_read > 0:
                     break
                 return None
-            if not chunk: # reached the end of the file
+            if n == 0:  # reached the end of the file
                 break
-            result += chunk
+            bytes_read += n
 
+        del result[bytes_read:]
         return bytes(result)
 
     def readinto(self, buffer):
diff --git a/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst b/Misc/NEWS.d/next/Library/2025-01-28-21-22-44.gh-issue-129005.h57i9j.rst
new file mode 100644 (file)
index 0000000..c76fb05
--- /dev/null
@@ -0,0 +1,2 @@
+``_pyio.FileIO.readall()`` now allocates, resizes, and fills a data buffer using
+the same algorithm ``_io.FileIO.readall()`` uses.