]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-57911: Sanitize symlink targets in tarfile on win32 (GH-138309)
authorChristoph Walcher <christoph-wa@gmx.de>
Fri, 5 Sep 2025 14:19:47 +0000 (16:19 +0200)
committerGitHub <noreply@github.com>
Fri, 5 Sep 2025 14:19:47 +0000 (16:19 +0200)
Doc/whatsnew/3.15.rst
Lib/tarfile.py
Lib/test/test_tarfile.py
Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst [new file with mode: 0644]

index 932bb100cbee2316cf28acd1ef6cb7e0aac22529..61203686326c392e32dfe24382062778d1797dbd 100644 (file)
@@ -463,6 +463,10 @@ tarfile
   :func:`~tarfile.TarFile.errorlevel` is zero.
   (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887`
   and :cve:`2025-4435`.)
+* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall`
+  now replace slashes by backslashes in symlink targets on Windows to prevent
+  creation of corrupted links.
+  (Contributed by Christoph Walcher in :gh:`57911`.)
 
 
 types
index 7aa4a032b63e6bbc01d310a7a5e7d8a1ce04c74e..7db3a40c9b33cf83a23cdd12dfee84e000a72959 100644 (file)
@@ -2718,7 +2718,13 @@ class TarFile(object):
                 if os.path.lexists(targetpath):
                     # Avoid FileExistsError on following os.symlink.
                     os.unlink(targetpath)
-                os.symlink(tarinfo.linkname, targetpath)
+                link_target = tarinfo.linkname
+                if os.name == "nt":
+                    # gh-57911: Posix-flavoured forward-slash path separators in
+                    # symlink targets aren't acknowledged by Windows, resulting
+                    # in corrupted links.
+                    link_target = link_target.replace("/", os.path.sep)
+                os.symlink(link_target, targetpath)
                 return
             else:
                 if os.path.exists(tarinfo._link_target):
index 860413b88eb6b51960f0a45fae21e6ebf853b1d0..e5466c3bf2a5e84131e960ba84f1ae374a373b61 100644 (file)
@@ -2777,7 +2777,7 @@ class MiscTest(unittest.TestCase):
             str(excinfo.exception),
         )
 
-    @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support')
+    @os_helper.skip_unless_symlink
     @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod")
     @unittest.mock.patch('os.chmod')
     def test_deferred_directory_attributes_update(self, mock_chmod):
@@ -3663,6 +3663,39 @@ class TestExtractionFilters(unittest.TestCase):
     # The destination for the extraction, within `outerdir`
     destdir = outerdir / 'dest'
 
+    @classmethod
+    def setUpClass(cls):
+        # Posix and Windows have different pathname resolution:
+        # either symlink or a '..' component resolve first.
+        # Let's see which we are on.
+        if os_helper.can_symlink():
+            testpath = os.path.join(TEMPDIR, 'resolution_test')
+            os.mkdir(testpath)
+
+            # testpath/current links to `.` which is all of:
+            #   - `testpath`
+            #   - `testpath/current`
+            #   - `testpath/current/current`
+            #   - etc.
+            os.symlink('.', os.path.join(testpath, 'current'))
+
+            # we'll test where `testpath/current/../file` ends up
+            with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
+                pass
+
+            if os.path.exists(os.path.join(testpath, 'file')):
+                # Windows collapses 'current\..' to '.' first, leaving
+                # 'testpath\file'
+                cls.dotdot_resolves_early = True
+            elif os.path.exists(os.path.join(testpath, '..', 'file')):
+                # Posix resolves 'current' to '.' first, leaving
+                # 'testpath/../file'
+                cls.dotdot_resolves_early = False
+            else:
+                raise AssertionError('Could not determine link resolution')
+        else:
+            cls.dotdot_resolves_early = True
+
     @contextmanager
     def check_context(self, tar, filter, *, check_flag=True):
         """Extracts `tar` to `self.destdir` and allows checking the result
@@ -3809,23 +3842,21 @@ class TestExtractionFilters(unittest.TestCase):
             arc.add('current', symlink_to='.')
 
             # effectively points to ./../
-            arc.add('parent', symlink_to='current/..')
+            if self.dotdot_resolves_early:
+                arc.add('parent', symlink_to='current/../..')
+            else:
+                arc.add('parent', symlink_to='current/..')
 
             arc.add('parent/evil')
 
         if os_helper.can_symlink():
             with self.check_context(arc.open(), 'fully_trusted'):
-                if self.raised_exception is not None:
-                    # Windows will refuse to create a file that's a symlink to itself
-                    # (and tarfile doesn't swallow that exception)
-                    self.expect_exception(FileExistsError)
-                    # The other cases will fail with this error too.
-                    # Skip the rest of this test.
-                    return
+                self.expect_file('current', symlink_to='.')
+                if self.dotdot_resolves_early:
+                    self.expect_file('parent', symlink_to='current/../..')
                 else:
-                    self.expect_file('current', symlink_to='.')
                     self.expect_file('parent', symlink_to='current/..')
-                    self.expect_file('../evil')
+                self.expect_file('../evil')
 
             with self.check_context(arc.open(), 'tar'):
                 self.expect_exception(
@@ -3927,35 +3958,6 @@ class TestExtractionFilters(unittest.TestCase):
         # Test interplaying symlinks
         # Inspired by 'dirsymlink2b' in jwilk/traversal-archives
 
-        # Posix and Windows have different pathname resolution:
-        # either symlink or a '..' component resolve first.
-        # Let's see which we are on.
-        if os_helper.can_symlink():
-            testpath = os.path.join(TEMPDIR, 'resolution_test')
-            os.mkdir(testpath)
-
-            # testpath/current links to `.` which is all of:
-            #   - `testpath`
-            #   - `testpath/current`
-            #   - `testpath/current/current`
-            #   - etc.
-            os.symlink('.', os.path.join(testpath, 'current'))
-
-            # we'll test where `testpath/current/../file` ends up
-            with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
-                pass
-
-            if os.path.exists(os.path.join(testpath, 'file')):
-                # Windows collapses 'current\..' to '.' first, leaving
-                # 'testpath\file'
-                dotdot_resolves_early = True
-            elif os.path.exists(os.path.join(testpath, '..', 'file')):
-                # Posix resolves 'current' to '.' first, leaving
-                # 'testpath/../file'
-                dotdot_resolves_early = False
-            else:
-                raise AssertionError('Could not determine link resolution')
-
         with ArchiveMaker() as arc:
 
             # `current` links to `.` which is both the destination directory
@@ -3991,7 +3993,7 @@ class TestExtractionFilters(unittest.TestCase):
 
         with self.check_context(arc.open(), 'data'):
             if os_helper.can_symlink():
-                if dotdot_resolves_early:
+                if self.dotdot_resolves_early:
                     # Fail when extracting a file outside destination
                     self.expect_exception(
                             tarfile.OutsideDestinationError,
@@ -4039,6 +4041,21 @@ class TestExtractionFilters(unittest.TestCase):
                 tarfile.AbsoluteLinkError,
                 "'parent' is a link to an absolute path")
 
+    @symlink_test
+    @os_helper.skip_unless_symlink
+    def test_symlink_target_seperator_rewrite_on_windows(self):
+        with ArchiveMaker() as arc:
+            arc.add('link', symlink_to="relative/test/path")
+
+        with self.check_context(arc.open(), 'fully_trusted'):
+            self.expect_file('link', type=tarfile.SYMTYPE)
+            link_path = os.path.normpath(self.destdir / "link")
+            link_target = os.readlink(link_path)
+            if os.name == "nt":
+                self.assertEqual(link_target, "relative\\test\\path")
+            else:
+                self.assertEqual(link_target, "relative/test/path")
+
     def test_absolute_hardlink(self):
         # Test hardlink to an absolute path
         # Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
diff --git a/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst b/Misc/NEWS.d/next/Library/2025-08-31-22-10-22.gh-issue-57911.N_Ixtv.rst
new file mode 100644 (file)
index 0000000..b0d306c
--- /dev/null
@@ -0,0 +1,2 @@
+When extracting tar files on Windows, slashes in symlink targets will be
+replaced by backslashes to prevent corrupted links.