[3.9] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) (...

author Jason R. Coombs <jaraco@jaraco.com>

Wed, 4 Sep 2024 15:46:48 +0000 (11:46 -0400)

committer GitHub <noreply@github.com>

Wed, 4 Sep 2024 15:46:48 +0000 (17:46 +0200)
author Jason R. Coombs <jaraco@jaraco.com>
Wed, 4 Sep 2024 15:46:48 +0000 (11:46 -0400)
committer GitHub <noreply@github.com>
Wed, 4 Sep 2024 15:46:48 +0000 (17:46 +0200)
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py

index 17e95eb86239a5eb1b7b003bd2b6bd9e6a7dd877..5809b2c00060a60a2c9ca287e08f1a58c36fe545 100644 (file)
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -3054,6 +3054,83 @@ class TestPath(unittest.TestCase):
          data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
          zipfile.CompleteDirs._implied_dirs(data)
  
+    def test_malformed_paths(self):
+        """
+        Path should handle malformed paths gracefully.
+
+        Paths with leading slashes are not visible.
+
+        Paths with dots are treated like regular files.
+        """
+        data = io.BytesIO()
+        zf = zipfile.ZipFile(data, "w")
+        zf.writestr("/one-slash.txt", b"content")
+        zf.writestr("//two-slash.txt", b"content")
+        zf.writestr("../parent.txt", b"content")
+        zf.filename = ''
+        root = zipfile.Path(zf)
+        assert list(map(str, root.iterdir())) == ['../']
+        assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
+
+    def test_unsupported_names(self):
+        """
+        Path segments with special characters are readable.
+
+        On some platforms or file systems, characters like
+        ``:`` and ``?`` are not allowed, but they are valid
+        in the zip file.
+        """
+        data = io.BytesIO()
+        zf = zipfile.ZipFile(data, "w")
+        zf.writestr("path?", b"content")
+        zf.writestr("V: NMS.flac", b"fLaC...")
+        zf.filename = ''
+        root = zipfile.Path(zf)
+        contents = root.iterdir()
+        assert next(contents).name == 'path?'
+        assert next(contents).name == 'V: NMS.flac'
+        assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
+
+    def test_backslash_not_separator(self):
+        """
+        In a zip file, backslashes are not separators.
+        """
+        data = io.BytesIO()
+        zf = zipfile.ZipFile(data, "w")
+        zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
+        zf.filename = ''
+        root = zipfile.Path(zf)
+        (first,) = root.iterdir()
+        assert not first.is_dir()
+        assert first.name == 'foo\\bar'
+
+
+class DirtyZipInfo(zipfile.ZipInfo):
+    """
+    Bypass name sanitization.
+    """
+
+    def __init__(self, filename, *args, **kwargs):
+        super().__init__(filename, *args, **kwargs)
+        self.filename = filename
+
+    @classmethod
+    def for_name(cls, name, archive):
+        """
+        Construct the same way that ZipFile.writestr does.
+
+        TODO: extract this functionality and re-use
+        """
+        self = cls(filename=name, date_time=time.localtime(time.time())[:6])
+        self.compress_type = archive.compression
+        self.compress_level = archive.compresslevel
+        if self.filename.endswith('/'):  # pragma: no cover
+            self.external_attr = 0o40775 << 16  # drwxrwxr-x
+            self.external_attr |= 0x10  # MS-DOS directory flag
+        else:
+            self.external_attr = 0o600 << 16  # ?rw-------
+        return self
+
  
  if __name__ == "__main__":
      unittest.main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py

index 95f95ee112667a23ccd5989977754058dffc8bd2..68d643ddbdd8e6ab00a224be0ac5b8afb3dff6a8 100644 (file)
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -2146,7 +2146,7 @@ def _parents(path):
  def _ancestry(path):
      """
      Given a path with elements separated by
-    posixpath.sep, generate all elements of that path
+    posixpath.sep, generate all elements of that path.
  
      >>> list(_ancestry('b/d'))
      ['b/d', 'b']
@@ -2158,9 +2158,14 @@ def _ancestry(path):
      ['b']
      >>> list(_ancestry(''))
      []
+
+    Multiple separators are treated like a single.
+
+    >>> list(_ancestry('//b//d///f//'))
+    ['//b//d///f', '//b//d', '//b']
      """
      path = path.rstrip(posixpath.sep)
-    while path and path != posixpath.sep:
+    while path.rstrip(posixpath.sep):
          yield path
          path, tail = posixpath.split(path)
  
diff --git a/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst

new file mode 100644 (file)

index 0000000..ee9fde6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
@@ -0,0 +1,3 @@
+Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
+causing infinite loops (gh-122905) without breaking contents using
+legitimate characters.
author	Jason R. Coombs <jaraco@jaraco.com>
	Wed, 4 Sep 2024 15:46:48 +0000 (11:46 -0400)
committer	GitHub <noreply@github.com>
	Wed, 4 Sep 2024 15:46:48 +0000 (17:46 +0200)
Lib/test/test_zipfile.py		patch \| blob \| blame \| history
Lib/zipfile.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst	[new file with mode: 0644]	patch \| blob