From: Emma Smith Date: Tue, 8 Apr 2025 07:43:14 +0000 (-0700) Subject: gh-84481: Make ZipFile.data_offset more robust (#132178) X-Git-Tag: v3.14.0a7~10 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6cd1d6c6b142697fb72f422b7b448c27ebc30534;p=thirdparty%2FPython%2Fcpython.git gh-84481: Make ZipFile.data_offset more robust (#132178) --- diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 94c0a44f3758..2a4e1acf2195 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3348,6 +3348,12 @@ class TestDataOffsetZipWrite(unittest.TestCase): with zipfile.ZipFile(fp, "w") as zipfp: self.assertEqual(zipfp.data_offset, 16) + def test_data_offset_append_with_bad_zip(self): + with io.BytesIO() as fp: + fp.write(b"this is a prefix") + with zipfile.ZipFile(fp, "a") as zipfp: + self.assertEqual(zipfp.data_offset, 16) + def test_data_offset_write_no_tell(self): # The initializer in ZipFile checks if tell raises AttributeError or # OSError when creating a file in write mode when deducing the offset @@ -3357,7 +3363,7 @@ class TestDataOffsetZipWrite(unittest.TestCase): raise OSError("Unimplemented!") with NoTellBytesIO() as fp: with zipfile.ZipFile(fp, "w") as zipfp: - self.assertIs(zipfp.data_offset, None) + self.assertIsNone(zipfp.data_offset) class EncodedMetadataTests(unittest.TestCase): diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index b061691ac6f8..e3a94215bd67 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -1403,6 +1403,7 @@ class ZipFile: self._lock = threading.RLock() self._seekable = True self._writing = False + self._data_offset = None try: if mode == 'r': @@ -1418,7 +1419,6 @@ class ZipFile: self.fp = _Tellable(self.fp) self.start_dir = 0 self._seekable = False - self._data_offset = None else: # Some file-like objects can provide tell() but not seek() try: @@ -1439,6 +1439,7 @@ class ZipFile: # even if no files are added to the archive self._didModify = True self.start_dir = self.fp.tell() + self._data_offset = self.start_dir else: raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") except: