From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Wed, 15 Apr 2020 18:45:25 +0000 (-0700) Subject: [3.8] bpo-39667: Sync zipp 3.0 (GH-18540) (GH-18701) X-Git-Tag: v3.8.3rc1~30 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3e72de9e08b03a15875f5b226c5f096e567dab42;p=thirdparty%2FPython%2Fcpython.git [3.8] bpo-39667: Sync zipp 3.0 (GH-18540) (GH-18701) * bpo-39667: Sync zipp 3.0 (GH-18540) * bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0 * 📜🤖 Added by blurb_it. * Update docs for new zipfile.Path.open * Rely on dict, faster than OrderedDict. * Syntax edits on docs Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> (cherry picked from commit 0aeab5c4381f0cc11479362af2533b3a391312ac) Co-authored-by: Jason R. Coombs * Clarify the change in behavior with a couple of workaround options. * Restore API compatibility while backporting performance improvements. Co-authored-by: Jason R. Coombs --- diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index e8a2530fb8c1..97da6cab806e 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -494,6 +494,12 @@ Path objects are traversable using the ``/`` operator. Invoke :meth:`ZipFile.open` on the current path. Accepts the same arguments as :meth:`ZipFile.open`. + .. caution:: + + The signature on this function changes in an incompatible way + in Python 3.9. For a future-compatible version, consider using + the third-party zipp.Path package (3.0 or later). + .. method:: Path.iterdir() Enumerate the children of the current directory. diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 61bca8651c02..28e62dc5c61c 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -5,6 +5,7 @@ import itertools import os import pathlib import posixpath +import string import struct import subprocess import sys @@ -2933,6 +2934,11 @@ class TestPath(unittest.TestCase): # Check the file iterated all items assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES + # @func_timeout.func_set_timeout(3) + def test_implied_dirs_performance(self): + data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)] + zipfile.CompleteDirs._implied_dirs(data) + if __name__ == "__main__": unittest.main() diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 5dc6516cc47b..07faaccac922 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -17,7 +17,6 @@ import sys import threading import time import contextlib -from collections import OrderedDict try: import zlib # We may need its compression method @@ -2125,24 +2124,6 @@ class PyZipFile(ZipFile): return (fname, archivename) -def _unique_everseen(iterable, key=None): - "List unique elements, preserving order. Remember all elements ever seen." - # unique_everseen('AAAABBBCCDAABBB') --> A B C D - # unique_everseen('ABBCcAD', str.lower) --> A B C D - seen = set() - seen_add = seen.add - if key is None: - for element in itertools.filterfalse(seen.__contains__, iterable): - seen_add(element) - yield element - else: - for element in iterable: - k = key(element) - if k not in seen: - seen_add(k) - yield element - - def _parents(path): """ Given a path with elements separated by @@ -2184,6 +2165,18 @@ def _ancestry(path): path, tail = posixpath.split(path) +_dedupe = dict.fromkeys +"""Deduplicate an iterable in original order""" + + +def _difference(minuend, subtrahend): + """ + Return items in minuend not in subtrahend, retaining order + with O(1) lookup. + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + + class CompleteDirs(ZipFile): """ A ZipFile subclass that ensures that implied directories @@ -2193,13 +2186,8 @@ class CompleteDirs(ZipFile): @staticmethod def _implied_dirs(names): parents = itertools.chain.from_iterable(map(_parents, names)) - # Deduplicate entries in original order - implied_dirs = OrderedDict.fromkeys( - p + posixpath.sep for p in parents - # Cast names to a set for O(1) lookups - if p + posixpath.sep not in set(names) - ) - return implied_dirs + as_dirs = (p + posixpath.sep for p in parents) + return _dedupe(_difference(as_dirs, names)) def namelist(self): names = super(CompleteDirs, self).namelist() diff --git a/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst b/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst new file mode 100644 index 000000000000..ccc33e289846 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-17-22-38-15.bpo-39667.QuzEHH.rst @@ -0,0 +1 @@ +Correct performance degradation in ``zipfile.Path`` as found in zipp 3.0. While retaining compatibility, this change discourages the use of ``zipfile.Path.open`` due to the signature change in Python 3.9. For compatibility across Python 3.8 and later versions, consider using ``zipp.Path`` on Python 3.8.x and earlier.