]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-130577: tarfile now validates archives to ensure member offsets are non...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Mon, 28 Jul 2025 15:59:33 +0000 (17:59 +0200)
committerGitHub <noreply@github.com>
Mon, 28 Jul 2025 15:59:33 +0000 (15:59 +0000)
gh-130577: tarfile now validates archives to ensure member offsets are non-negative (GH-137027)
(cherry picked from commit 7040aa54f14676938970e10c5f74ea93cd56aa38)

Co-authored-by: Alexander Urieles <aeurielesn@users.noreply.github.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Lib/tarfile.py
Lib/test/test_tarfile.py
Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst [new file with mode: 0644]

index 0980f6a81759ce781659ed832c67d7f539fc9f26..9ff9df696de68ca0582ea214ac56fba08ec8fa4f 100755 (executable)
@@ -1636,6 +1636,9 @@ class TarInfo(object):
         """Round up a byte count by BLOCKSIZE and return it,
            e.g. _block(834) => 1024.
         """
+        # Only non-negative offsets are allowed
+        if count < 0:
+            raise InvalidHeaderError("invalid offset")
         blocks, remainder = divmod(count, BLOCKSIZE)
         if remainder:
             blocks += 1
index ac31be0f05062cde2008055f94ad04f2deccef6d..7024be46de59c9d10ffa696ed1510d9b3cac5b4e 100644 (file)
@@ -50,6 +50,7 @@ bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
 xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
 tmpname = os.path.join(TEMPDIR, "tmp.tar")
 dotlessname = os.path.join(TEMPDIR, "testtar")
+SPACE = b" "
 
 sha256_regtype = (
     "e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce"
@@ -4578,6 +4579,161 @@ class OverwriteTests(archiver_tests.OverwriteTests, unittest.TestCase):
         ar.extractall(self.testdir, filter='fully_trusted')
 
 
+class OffsetValidationTests(unittest.TestCase):
+    tarname = tmpname
+    invalid_posix_header = (
+        # name: 100 bytes
+        tarfile.NUL * tarfile.LENGTH_NAME
+        # mode, space, null terminator: 8 bytes
+        + b"000755" + SPACE + tarfile.NUL
+        # uid, space, null terminator: 8 bytes
+        + b"000001" + SPACE + tarfile.NUL
+        # gid, space, null terminator: 8 bytes
+        + b"000001" + SPACE + tarfile.NUL
+        # size, space: 12 bytes
+        + b"\xff" * 11 + SPACE
+        # mtime, space: 12 bytes
+        + tarfile.NUL * 11 + SPACE
+        # chksum: 8 bytes
+        + b"0011407" + tarfile.NUL
+        # type: 1 byte
+        + tarfile.REGTYPE
+        # linkname: 100 bytes
+        + tarfile.NUL * tarfile.LENGTH_LINK
+        # magic: 6 bytes, version: 2 bytes
+        + tarfile.POSIX_MAGIC
+        # uname: 32 bytes
+        + tarfile.NUL * 32
+        # gname: 32 bytes
+        + tarfile.NUL * 32
+        # devmajor, space, null terminator: 8 bytes
+        + tarfile.NUL * 6 + SPACE + tarfile.NUL
+        # devminor, space, null terminator: 8 bytes
+        + tarfile.NUL * 6 + SPACE + tarfile.NUL
+        # prefix: 155 bytes
+        + tarfile.NUL * tarfile.LENGTH_PREFIX
+        # padding: 12 bytes
+        + tarfile.NUL * 12
+    )
+    invalid_gnu_header = (
+        # name: 100 bytes
+        tarfile.NUL * tarfile.LENGTH_NAME
+        # mode, null terminator: 8 bytes
+        + b"0000755" + tarfile.NUL
+        # uid, null terminator: 8 bytes
+        + b"0000001" + tarfile.NUL
+        # gid, space, null terminator: 8 bytes
+        + b"0000001" + tarfile.NUL
+        # size, space: 12 bytes
+        + b"\xff" * 11 + SPACE
+        # mtime, space: 12 bytes
+        + tarfile.NUL * 11 + SPACE
+        # chksum: 8 bytes
+        + b"0011327" + tarfile.NUL
+        # type: 1 byte
+        + tarfile.REGTYPE
+        # linkname: 100 bytes
+        + tarfile.NUL * tarfile.LENGTH_LINK
+        # magic: 8 bytes
+        + tarfile.GNU_MAGIC
+        # uname: 32 bytes
+        + tarfile.NUL * 32
+        # gname: 32 bytes
+        + tarfile.NUL * 32
+        # devmajor, null terminator: 8 bytes
+        + tarfile.NUL * 8
+        # devminor, null terminator: 8 bytes
+        + tarfile.NUL * 8
+        # padding: 167 bytes
+        + tarfile.NUL * 167
+    )
+    invalid_v7_header = (
+        # name: 100 bytes
+        tarfile.NUL * tarfile.LENGTH_NAME
+        # mode, space, null terminator: 8 bytes
+        + b"000755" + SPACE + tarfile.NUL
+        # uid, space, null terminator: 8 bytes
+        + b"000001" + SPACE + tarfile.NUL
+        # gid, space, null terminator: 8 bytes
+        + b"000001" + SPACE + tarfile.NUL
+        # size, space: 12 bytes
+        + b"\xff" * 11 + SPACE
+        # mtime, space: 12 bytes
+        + tarfile.NUL * 11 + SPACE
+        # chksum: 8 bytes
+        + b"0010070" + tarfile.NUL
+        # type: 1 byte
+        + tarfile.REGTYPE
+        # linkname: 100 bytes
+        + tarfile.NUL * tarfile.LENGTH_LINK
+        # padding: 255 bytes
+        + tarfile.NUL * 255
+    )
+    valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT)
+    data_block = b"\xff" * tarfile.BLOCKSIZE
+
+    def _write_buffer(self, buffer):
+        with open(self.tarname, "wb") as f:
+            f.write(buffer)
+
+    def _get_members(self, ignore_zeros=None):
+        with open(self.tarname, "rb") as f:
+            with tarfile.open(
+                mode="r", fileobj=f, ignore_zeros=ignore_zeros
+            ) as tar:
+                return tar.getmembers()
+
+    def _assert_raises_read_error_exception(self):
+        with self.assertRaisesRegex(
+            tarfile.ReadError, "file could not be opened successfully"
+        ):
+            self._get_members()
+
+    def test_invalid_offset_header_validations(self):
+        for tar_format, invalid_header in (
+            ("posix", self.invalid_posix_header),
+            ("gnu", self.invalid_gnu_header),
+            ("v7", self.invalid_v7_header),
+        ):
+            with self.subTest(format=tar_format):
+                self._write_buffer(invalid_header)
+                self._assert_raises_read_error_exception()
+
+    def test_early_stop_at_invalid_offset_header(self):
+        buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header
+        self._write_buffer(buffer)
+        members = self._get_members()
+        self.assertEqual(len(members), 1)
+        self.assertEqual(members[0].name, "filename")
+        self.assertEqual(members[0].offset, 0)
+
+    def test_ignore_invalid_archive(self):
+        # 3 invalid headers with their respective data
+        buffer = (self.invalid_gnu_header + self.data_block) * 3
+        self._write_buffer(buffer)
+        members = self._get_members(ignore_zeros=True)
+        self.assertEqual(len(members), 0)
+
+    def test_ignore_invalid_offset_headers(self):
+        for first_block, second_block, expected_offset in (
+            (
+                (self.valid_gnu_header),
+                (self.invalid_gnu_header + self.data_block),
+                0,
+            ),
+            (
+                (self.invalid_gnu_header + self.data_block),
+                (self.valid_gnu_header),
+                1024,
+            ),
+        ):
+            self._write_buffer(first_block + second_block)
+            members = self._get_members(ignore_zeros=True)
+            self.assertEqual(len(members), 1)
+            self.assertEqual(members[0].name, "filename")
+            self.assertEqual(members[0].offset, expected_offset)
+
+
 def setUpModule():
     os_helper.unlink(TEMPDIR)
     os.makedirs(TEMPDIR)
diff --git a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst
new file mode 100644 (file)
index 0000000..342cabb
--- /dev/null
@@ -0,0 +1,3 @@
+:mod:`tarfile` now validates archives to ensure member offsets are
+non-negative.  (Contributed by Alexander Enrique Urieles Nieto in
+:gh:`130577`.)