]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-41316: Make tarfile follow specs for FNAME (GH-21511)
authorMiss Skeleton (bot) <31488909+miss-islington@users.noreply.github.com>
Wed, 21 Oct 2020 05:29:00 +0000 (22:29 -0700)
committerGitHub <noreply@github.com>
Wed, 21 Oct 2020 05:29:00 +0000 (22:29 -0700)
tarfile writes full path to FNAME field of GZIP format instead of just basename if user specified absolute path. Some archive viewers may process file incorrectly. Also it creates security issue because anyone can know structure of directories on system and know username or other personal information.

RFC1952 says about FNAME:
This is the original name of the file being compressed, with any directory components removed.

So tarfile must remove directory names from FNAME and write only basename of file.

Automerge-Triggered-By: @jaraco
(cherry picked from commit 22748a83d927d3da1beaed771be30887c42b2500)

Co-authored-by: Artem Bulgakov <ArtemSBulgakov@ya.ru>
Lib/tarfile.py
Lib/test/test_tarfile.py
Misc/ACKS
Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst [new file with mode: 0644]

index 62a6d9d120ffa249ddb7648947d9169c11c3f1a3..e42279470dac652ed40a9a0dcfde6c6013586013 100755 (executable)
@@ -420,6 +420,8 @@ class _Stream:
         self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
         if self.name.endswith(".gz"):
             self.name = self.name[:-3]
+        # Honor "directory components removed" from RFC1952
+        self.name = os.path.basename(self.name)
         # RFC1952 says we must use ISO-8859-1 for the FNAME field.
         self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
 
index 3ddeb97f5268fe8ba1bd7c95b579e681d2243010..1e5186a90b37f9a342b3c55400ace446a2ea7b7b 100644 (file)
@@ -1416,12 +1416,15 @@ class WriteTest(WriteTestBase, unittest.TestCase):
                                    pax_headers={'non': 'empty'})
             self.assertFalse(f.closed)
 
+
 class GzipWriteTest(GzipTest, WriteTest):
     pass
 
+
 class Bz2WriteTest(Bz2Test, WriteTest):
     pass
 
+
 class LzmaWriteTest(LzmaTest, WriteTest):
     pass
 
@@ -1464,8 +1467,17 @@ class StreamWriteTest(WriteTestBase, unittest.TestCase):
         finally:
             os.umask(original_umask)
 
+
 class GzipStreamWriteTest(GzipTest, StreamWriteTest):
-    pass
+    def test_source_directory_not_leaked(self):
+        """
+        Ensure the source directory is not included in the tar header
+        per bpo-41316.
+        """
+        tarfile.open(tmpname, self.mode).close()
+        payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
+        assert os.path.dirname(tmpname) not in payload
+
 
 class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
     decompressor = bz2.BZ2Decompressor if bz2 else None
index 021df26426e3610ad758e07181e6097886c166c0..f06fad7926d4618d8f61966ae1b65124e7cc256f 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -243,6 +243,7 @@ Colm Buckley
 Erik de Bueger
 Jan-Hein Bührman
 Lars Buitinck
+Artem Bulgakov
 Dick Bulterman
 Bill Bumgarner
 Jimmy Burgett
diff --git a/Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst b/Misc/NEWS.d/next/Library/2020-07-28-12-08-58.bpo-41316.bSCbK4.rst
new file mode 100644 (file)
index 0000000..139a170
--- /dev/null
@@ -0,0 +1 @@
+Fix the :mod:`tarfile` module to write only basename of TAR file to GZIP compression header.
\ No newline at end of file