]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-88339: enable fast seeking of uncompressed unencrypted zipfile.ZipExtFile (GH...
authorJuniorJPDJ <github.com@juniorjpdj.pl>
Sat, 6 Aug 2022 23:21:23 +0000 (01:21 +0200)
committerGitHub <noreply@github.com>
Sat, 6 Aug 2022 23:21:23 +0000 (16:21 -0700)
Avoid reading all of the intermediate data in uncompressed items in a zip file when the user seeks forward.

Contributed by: @JuniorJPDJ

Lib/test/test_zipfile.py
Lib/zipfile.py
Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst [new file with mode: 0644]

index fa0ca5aa7428ace787e11f99741f85e8209a5db6..21257785159b37a05043d19eaea8e0fa2968a81d 100644 (file)
@@ -2032,6 +2032,7 @@ class OtherTests(unittest.TestCase):
                 fp.seek(bloc, os.SEEK_CUR)
                 self.assertEqual(fp.tell(), bloc)
                 self.assertEqual(fp.read(5), txt[bloc:bloc+5])
+                self.assertEqual(fp.tell(), bloc + 5)
                 fp.seek(0, os.SEEK_END)
                 self.assertEqual(fp.tell(), len(txt))
                 fp.seek(0, os.SEEK_SET)
@@ -2049,6 +2050,7 @@ class OtherTests(unittest.TestCase):
                 fp.seek(bloc, os.SEEK_CUR)
                 self.assertEqual(fp.tell(), bloc)
                 self.assertEqual(fp.read(5), txt[bloc:bloc+5])
+                self.assertEqual(fp.tell(), bloc + 5)
                 fp.seek(0, os.SEEK_END)
                 self.assertEqual(fp.tell(), len(txt))
                 fp.seek(0, os.SEEK_SET)
index e3b7a61a6399be129c22c89e6242560a025f8ad5..981560082cab7af572dc71c2e853a88b329f7244 100644 (file)
@@ -847,6 +847,7 @@ class ZipExtFile(io.BufferedIOBase):
                 self._orig_compress_size = zipinfo.compress_size
                 self._orig_file_size = zipinfo.file_size
                 self._orig_start_crc = self._running_crc
+                self._orig_crc = self._expected_crc
                 self._seekable = True
         except AttributeError:
             pass
@@ -1069,17 +1070,17 @@ class ZipExtFile(io.BufferedIOBase):
             raise ValueError("I/O operation on closed file.")
         return self._seekable
 
-    def seek(self, offset, whence=0):
+    def seek(self, offset, whence=os.SEEK_SET):
         if self.closed:
             raise ValueError("seek on closed file.")
         if not self._seekable:
             raise io.UnsupportedOperation("underlying stream is not seekable")
         curr_pos = self.tell()
-        if whence == 0: # Seek from start of file
+        if whence == os.SEEK_SET:
             new_pos = offset
-        elif whence == 1: # Seek from current position
+        elif whence == os.SEEK_CUR:
             new_pos = curr_pos + offset
-        elif whence == 2: # Seek from EOF
+        elif whence == os.SEEK_END:
             new_pos = self._orig_file_size + offset
         else:
             raise ValueError("whence must be os.SEEK_SET (0), "
@@ -1094,7 +1095,19 @@ class ZipExtFile(io.BufferedIOBase):
         read_offset = new_pos - curr_pos
         buff_offset = read_offset + self._offset
 
-        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
+        # Fast seek uncompressed unencrypted file
+        if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
+            # disable CRC checking after first seeking - it would be invalid
+            self._expected_crc = None
+            # seek actual file taking already buffered data into account
+            read_offset -= len(self._readbuffer) - self._offset
+            self._fileobj.seek(read_offset, os.SEEK_CUR)
+            self._left -= read_offset
+            read_offset = 0
+            # flush read buffer
+            self._readbuffer = b''
+            self._offset = 0
+        elif buff_offset >= 0 and buff_offset < len(self._readbuffer):
             # Just move the _offset index if the new position is in the _readbuffer
             self._offset = buff_offset
             read_offset = 0
@@ -1102,6 +1115,7 @@ class ZipExtFile(io.BufferedIOBase):
             # Position is before the current position. Reset the ZipExtFile
             self._fileobj.seek(self._orig_compress_start)
             self._running_crc = self._orig_start_crc
+            self._expected_crc = self._orig_crc
             self._compress_left = self._orig_compress_size
             self._left = self._orig_file_size
             self._readbuffer = b''
diff --git a/Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst b/Misc/NEWS.d/next/Library/2021-08-27-18-07-35.bpo-44173.oW92Ev.rst
new file mode 100644 (file)
index 0000000..abc9826
--- /dev/null
@@ -0,0 +1 @@
+Enable fast seeking of uncompressed unencrypted :class:`zipfile.ZipExtFile`