bpo-43780: Sync with importlib_metadata 3.10 (GH-25297)

author Jason R. Coombs <jaraco@jaraco.com>

Sat, 24 Apr 2021 14:13:51 +0000 (10:13 -0400)

committer GitHub <noreply@github.com>

Sat, 24 Apr 2021 14:13:51 +0000 (10:13 -0400)
author Jason R. Coombs <jaraco@jaraco.com>
Sat, 24 Apr 2021 14:13:51 +0000 (10:13 -0400)
committer GitHub <noreply@github.com>
Sat, 24 Apr 2021 14:13:51 +0000 (10:13 -0400)
diff --git a/Lib/importlib/_collections.py b/Lib/importlib/_collections.py

new file mode 100644 (file)

index 0000000..cf0954e
--- /dev/null
+++ b/Lib/importlib/_collections.py
@@ -0,0 +1,30 @@
+import collections
+
+
+# from jaraco.collections 3.3
+class FreezableDefaultDict(collections.defaultdict):
+    """
+    Often it is desirable to prevent the mutation of
+    a default dict after its initial construction, such
+    as to prevent mutation during iteration.
+
+    >>> dd = FreezableDefaultDict(list)
+    >>> dd[0].append('1')
+    >>> dd.freeze()
+    >>> dd[1]
+    []
+    >>> len(dd)
+    1
+    """
+
+    def __missing__(self, key):
+        return getattr(self, '_frozen', super().__missing__)(key)
+
+    def freeze(self):
+        self._frozen = lambda key: self.default_factory()
+
+
+class Pair(collections.namedtuple('Pair', 'name value')):
+    @classmethod
+    def parse(cls, text):
+        return cls(*map(str.strip, text.split("=", 1)))
diff --git a/Lib/importlib/_functools.py b/Lib/importlib/_functools.py

new file mode 100644 (file)

index 0000000..73f50d0
--- /dev/null
+++ b/Lib/importlib/_functools.py
@@ -0,0 +1,85 @@
+import types
+import functools
+
+
+# from jaraco.functools 3.3
+def method_cache(method, cache_wrapper=None):
+    """
+    Wrap lru_cache to support storing the cache data in the object instances.
+
+    Abstracts the common paradigm where the method explicitly saves an
+    underscore-prefixed protected property on first call and returns that
+    subsequently.
+
+    >>> class MyClass:
+    ...     calls = 0
+    ...
+    ...     @method_cache
+    ...     def method(self, value):
+    ...         self.calls += 1
+    ...         return value
+
+    >>> a = MyClass()
+    >>> a.method(3)
+    3
+    >>> for x in range(75):
+    ...     res = a.method(x)
+    >>> a.calls
+    75
+
+    Note that the apparent behavior will be exactly like that of lru_cache
+    except that the cache is stored on each instance, so values in one
+    instance will not flush values from another, and when an instance is
+    deleted, so are the cached values for that instance.
+
+    >>> b = MyClass()
+    >>> for x in range(35):
+    ...     res = b.method(x)
+    >>> b.calls
+    35
+    >>> a.method(0)
+    0
+    >>> a.calls
+    75
+
+    Note that if method had been decorated with ``functools.lru_cache()``,
+    a.calls would have been 76 (due to the cached value of 0 having been
+    flushed by the 'b' instance).
+
+    Clear the cache with ``.cache_clear()``
+
+    >>> a.method.cache_clear()
+
+    Same for a method that hasn't yet been called.
+
+    >>> c = MyClass()
+    >>> c.method.cache_clear()
+
+    Another cache wrapper may be supplied:
+
+    >>> cache = functools.lru_cache(maxsize=2)
+    >>> MyClass.method2 = method_cache(lambda self: 3, cache_wrapper=cache)
+    >>> a = MyClass()
+    >>> a.method2()
+    3
+
+    Caution - do not subsequently wrap the method with another decorator, such
+    as ``@property``, which changes the semantics of the function.
+
+    See also
+    http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
+    for another implementation and additional justification.
+    """
+    cache_wrapper = cache_wrapper or functools.lru_cache()
+
+    def wrapper(self, *args, **kwargs):
+        # it's the first call, replace the method with a cached, bound method
+        bound_method = types.MethodType(method, self)
+        cached_method = cache_wrapper(bound_method)
+        setattr(self, method.__name__, cached_method)
+        return cached_method(*args, **kwargs)
+
+    # Support cache clear even before cache has been created.
+    wrapper.cache_clear = lambda: None
+
+    return wrapper
diff --git a/Lib/importlib/metadata.py b/Lib/importlib/metadata.py

index 53c1a145f5c43bfa727dd4be60e685b7111fa287..7a427eb3b2870a05dc88f84fe9feaf5f55f4f48d 100644 (file)
--- a/Lib/importlib/metadata.py
+++ b/Lib/importlib/metadata.py
@@ -7,15 +7,17 @@ import email
  import pathlib
  import zipfile
  import operator
+import textwrap
  import warnings
  import functools
  import itertools
  import posixpath
  import collections
  
+from ._collections import FreezableDefaultDict, Pair
+from ._functools import method_cache
  from ._itertools import unique_everseen
  
-from configparser import ConfigParser
  from contextlib import suppress
  from importlib import import_module
  from importlib.abc import MetaPathFinder
@@ -51,6 +53,71 @@ class PackageNotFoundError(ModuleNotFoundError):
          return name
  
  
+class Sectioned:
+    """
+    A simple entry point config parser for performance
+
+    >>> for item in Sectioned.read(Sectioned._sample):
+    ...     print(item)
+    Pair(name='sec1', value='# comments ignored')
+    Pair(name='sec1', value='a = 1')
+    Pair(name='sec1', value='b = 2')
+    Pair(name='sec2', value='a = 2')
+
+    >>> res = Sectioned.section_pairs(Sectioned._sample)
+    >>> item = next(res)
+    >>> item.name
+    'sec1'
+    >>> item.value
+    Pair(name='a', value='1')
+    >>> item = next(res)
+    >>> item.value
+    Pair(name='b', value='2')
+    >>> item = next(res)
+    >>> item.name
+    'sec2'
+    >>> item.value
+    Pair(name='a', value='2')
+    >>> list(res)
+    []
+    """
+
+    _sample = textwrap.dedent(
+        """
+        [sec1]
+        # comments ignored
+        a = 1
+        b = 2
+
+        [sec2]
+        a = 2
+        """
+    ).lstrip()
+
+    @classmethod
+    def section_pairs(cls, text):
+        return (
+            section._replace(value=Pair.parse(section.value))
+            for section in cls.read(text, filter_=cls.valid)
+            if section.name is not None
+        )
+
+    @staticmethod
+    def read(text, filter_=None):
+        lines = filter(filter_, map(str.strip, text.splitlines()))
+        name = None
+        for value in lines:
+            section_match = value.startswith('[') and value.endswith(']')
+            if section_match:
+                name = value.strip('[]')
+                continue
+            yield Pair(name, value)
+
+    @staticmethod
+    def valid(line):
+        return line and not line.startswith('#')
+
+
  class EntryPoint(
          collections.namedtuple('EntryPointBase', 'name value group')):
      """An entry point as defined by Python packaging conventions.
@@ -108,22 +175,6 @@ class EntryPoint(
          match = self.pattern.match(self.value)
          return list(re.finditer(r'\w+', match.group('extras') or ''))
  
-    @classmethod
-    def _from_config(cls, config):
-        return (
-            cls(name, value, group)
-            for group in config.sections()
-            for name, value in config.items(group)
-        )
-
-    @classmethod
-    def _from_text(cls, text):
-        config = ConfigParser(delimiters='=')
-        # case sensitive: https://stackoverflow.com/q/1611799/812183
-        config.optionxform = str
-        config.read_string(text)
-        return cls._from_config(config)
-
      def _for(self, dist):
          self.dist = dist
          return self
@@ -193,7 +244,18 @@ class EntryPoints(tuple):
  
      @classmethod
      def _from_text_for(cls, text, dist):
-        return cls(ep._for(dist) for ep in EntryPoint._from_text(text))
+        return cls(ep._for(dist) for ep in cls._from_text(text))
+
+    @classmethod
+    def _from_text(cls, text):
+        return itertools.starmap(EntryPoint, cls._parse_groups(text or ''))
+
+    @staticmethod
+    def _parse_groups(text):
+        return (
+            (item.value.name, item.value.value, item.name)
+            for item in Sectioned.section_pairs(text)
+        )
  
  
  def flake8_bypass(func):
@@ -259,7 +321,7 @@ class Deprecated:
          return super().values()
  
  
-class SelectableGroups(dict):
+class SelectableGroups(Deprecated, dict):
      """
      A backward- and forward-compatible result from
      entry_points that fully implements the dict interface.
@@ -277,7 +339,8 @@ class SelectableGroups(dict):
          """
          Reconstruct a list of all entrypoints from the groups.
          """
-        return EntryPoints(itertools.chain.from_iterable(self.values()))
+        groups = super(Deprecated, self).values()
+        return EntryPoints(itertools.chain.from_iterable(groups))
  
      @property
      def groups(self):
@@ -507,24 +570,7 @@ class Distribution:
  
      @classmethod
      def _deps_from_requires_text(cls, source):
-        section_pairs = cls._read_sections(source.splitlines())
-        sections = {
-            section: list(map(operator.itemgetter('line'), results))
-            for section, results in itertools.groupby(
-                section_pairs, operator.itemgetter('section')
-            )
-        }
-        return cls._convert_egg_info_reqs_to_simple_reqs(sections)
-
-    @staticmethod
-    def _read_sections(lines):
-        section = None
-        for line in filter(None, lines):
-            section_match = re.match(r'\[(.*)\]$', line)
-            if section_match:
-                section = section_match.group(1)
-                continue
-            yield locals()
+        return cls._convert_egg_info_reqs_to_simple_reqs(Sectioned.read(source))
  
      @staticmethod
      def _convert_egg_info_reqs_to_simple_reqs(sections):
@@ -549,9 +595,8 @@ class Distribution:
              conditions = list(filter(None, [markers, make_condition(extra)]))
              return '; ' + ' and '.join(conditions) if conditions else ''
  
-        for section, deps in sections.items():
-            for dep in deps:
-                yield dep + parse_condition(section)
+        for section in sections:
+            yield section.value + parse_condition(section.name)
  
  
  class DistributionFinder(MetaPathFinder):
@@ -607,6 +652,10 @@ class FastPath:
      children.
      """
  
+    @functools.lru_cache()  # type: ignore
+    def __new__(cls, root):
+        return super().__new__(cls)
+
      def __init__(self, root):
          self.root = root
          self.base = os.path.basename(self.root).lower()
@@ -629,11 +678,53 @@ class FastPath:
          return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
  
      def search(self, name):
-        return (
-            self.joinpath(child)
-            for child in self.children()
-            if name.matches(child, self.base)
+        return self.lookup(self.mtime).search(name)
+
+    @property
+    def mtime(self):
+        with suppress(OSError):
+            return os.stat(self.root).st_mtime
+        self.lookup.cache_clear()
+
+    @method_cache
+    def lookup(self, mtime):
+        return Lookup(self)
+
+
+class Lookup:
+    def __init__(self, path: FastPath):
+        base = os.path.basename(path.root).lower()
+        base_is_egg = base.endswith(".egg")
+        self.infos = FreezableDefaultDict(list)
+        self.eggs = FreezableDefaultDict(list)
+
+        for child in path.children():
+            low = child.lower()
+            if low.endswith((".dist-info", ".egg-info")):
+                # rpartition is faster than splitext and suitable for this purpose.
+                name = low.rpartition(".")[0].partition("-")[0]
+                normalized = Prepared.normalize(name)
+                self.infos[normalized].append(path.joinpath(child))
+            elif base_is_egg and low == "egg-info":
+                name = base.rpartition(".")[0].partition("-")[0]
+                legacy_normalized = Prepared.legacy_normalize(name)
+                self.eggs[legacy_normalized].append(path.joinpath(child))
+
+        self.infos.freeze()
+        self.eggs.freeze()
+
+    def search(self, prepared):
+        infos = (
+            self.infos[prepared.normalized]
+            if prepared
+            else itertools.chain.from_iterable(self.infos.values())
+        )
+        eggs = (
+            self.eggs[prepared.legacy_normalized]
+            if prepared
+            else itertools.chain.from_iterable(self.eggs.values())
          )
+        return itertools.chain(infos, eggs)
  
  
  class Prepared:
@@ -642,22 +733,14 @@ class Prepared:
      """
  
      normalized = None
-    suffixes = 'dist-info', 'egg-info'
-    exact_matches = [''][:0]
-    egg_prefix = ''
-    versionless_egg_name = ''
+    legacy_normalized = None
  
      def __init__(self, name):
          self.name = name
          if name is None:
              return
          self.normalized = self.normalize(name)
-        self.exact_matches = [
-            self.normalized + '.' + suffix for suffix in self.suffixes
-        ]
-        legacy_normalized = self.legacy_normalize(self.name)
-        self.egg_prefix = legacy_normalized + '-'
-        self.versionless_egg_name = legacy_normalized + '.egg'
+        self.legacy_normalized = self.legacy_normalize(name)
  
      @staticmethod
      def normalize(name):
@@ -674,26 +757,8 @@ class Prepared:
          """
          return name.lower().replace('-', '_')
  
-    def matches(self, cand, base):
-        low = cand.lower()
-        # rpartition is faster than splitext and suitable for this purpose.
-        pre, _, ext = low.rpartition('.')
-        name, _, rest = pre.partition('-')
-        return (
-            low in self.exact_matches
-            or ext in self.suffixes
-            and (not self.normalized or name.replace('.', '_') == self.normalized)
-            # legacy case:
-            or self.is_egg(base)
-            and low == 'egg-info'
-        )
-
-    def is_egg(self, base):
-        return (
-            base == self.versionless_egg_name
-            or base.startswith(self.egg_prefix)
-            and base.endswith('.egg')
-        )
+    def __bool__(self):
+        return bool(self.name)
  
  
  class MetadataPathFinder(DistributionFinder):
@@ -718,6 +783,9 @@ class MetadataPathFinder(DistributionFinder):
              path.search(prepared) for path in map(FastPath, paths)
          )
  
+    def invalidate_caches(cls):
+        FastPath.__new__.cache_clear()
+
  
  class PathDistribution(Distribution):
      def __init__(self, path):
diff --git a/Lib/test/test_importlib/fixtures.py b/Lib/test/test_importlib/fixtures.py

index b50afda0f8f8f79c006dcd178ebf70d8bd3bded7..1ae70c70f10a554e2ac9663dea783a954eca253a 100644 (file)
--- a/Lib/test/test_importlib/fixtures.py
+++ b/Lib/test/test_importlib/fixtures.py
@@ -86,6 +86,10 @@ class DistInfoPkg(OnSysPath, SiteDir):
                  Version: 1.0.0
                  Requires-Dist: wheel >= 1.0
                  Requires-Dist: pytest; extra == 'test'
+                Keywords: sample package
+
+                Once upon a time
+                There was a distinfo pkg
                  """,
              "RECORD": "mod.py,sha256=abc,20\n",
              "entry_points.txt": """
@@ -157,6 +161,9 @@ class EggInfoPkg(OnSysPath, SiteDir):
                  Version: 1.0.0
                  Classifier: Intended Audience :: Developers
                  Classifier: Topic :: Software Development :: Libraries
+                Keywords: sample package
+                Description: Once upon a time
+                        There was an egginfo package
                  """,
              "SOURCES.txt": """
                  mod.py
diff --git a/Lib/test/test_importlib/test_metadata_api.py b/Lib/test/test_importlib/test_metadata_api.py

index b54c3bd098d4362468117397862f2e26c72456e3..657c16603f668a6d7c273cd64295c7707ed0f72f 100644 (file)
--- a/Lib/test/test_importlib/test_metadata_api.py
+++ b/Lib/test/test_importlib/test_metadata_api.py
@@ -2,6 +2,7 @@ import re
  import textwrap
  import unittest
  import warnings
+import importlib
  
  from . import fixtures
  from importlib.metadata import (
@@ -260,3 +261,9 @@ class OffSysPathTests(fixtures.DistInfoPkgOffPath, unittest.TestCase):
          dist_info_path = self.site_dir / 'distinfo_pkg-1.0.0.dist-info'
          dist = Distribution.at(str(dist_info_path))
          assert dist.version == '1.0.0'
+
+
+class InvalidateCache(unittest.TestCase):
+    def test_invalidate_cache(self):
+        # No externally observable behavior, but ensures test coverage...
+        importlib.invalidate_caches()
diff --git a/Misc/NEWS.d/next/Library/2021-04-08-20-04-46.bpo-43780.hUOgCh.rst b/Misc/NEWS.d/next/Library/2021-04-08-20-04-46.bpo-43780.hUOgCh.rst

new file mode 100644 (file)

index 0000000..3adbe50
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-04-08-20-04-46.bpo-43780.hUOgCh.rst
@@ -0,0 +1,3 @@
+In ``importlib.metadata``, incorporate changes from importlib_metadata 3.10:
+Add mtime-based caching during distribution discovery. Flagged use of dict
+result from ``entry_points()`` as deprecated.
author	Jason R. Coombs <jaraco@jaraco.com>
	Sat, 24 Apr 2021 14:13:51 +0000 (10:13 -0400)
committer	GitHub <noreply@github.com>
	Sat, 24 Apr 2021 14:13:51 +0000 (10:13 -0400)
Lib/importlib/_collections.py	[new file with mode: 0644]	patch \| blob
Lib/importlib/_functools.py	[new file with mode: 0644]	patch \| blob
Lib/importlib/metadata.py		patch \| blob \| blame \| history
Lib/test/test_importlib/fixtures.py		patch \| blob \| blame \| history
Lib/test/test_importlib/test_metadata_api.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2021-04-08-20-04-46.bpo-43780.hUOgCh.rst	[new file with mode: 0644]	patch \| blob