From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 13 Jan 2026 06:54:15 +0000 (+0200) Subject: gh-143658: importlib.metadata: Use `str.translate` to improve performance of `importl... X-Git-Tag: v3.15.0a5~11^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cbf9b8cc08364cdcf355fe1c897f698331b49a41;p=thirdparty%2FPython%2Fcpython.git gh-143658: importlib.metadata: Use `str.translate` to improve performance of `importlib.metadata.Prepared.normalized` (#143660) Co-authored-by: Henry Schreiner Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Bartosz Sławecki --- diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index b010bb8525e5..9b723b4ec15e 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -890,6 +890,14 @@ class Lookup: return itertools.chain(infos, eggs) +# Translation table for Prepared.normalize: lowercase and +# replace "-" (hyphen) and "." (dot) with "_" (underscore). +_normalize_table = str.maketrans( + "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.", + "abcdefghijklmnopqrstuvwxyz__", +) + + class Prepared: """ A prepared search query for metadata on a possibly-named package. @@ -925,7 +933,13 @@ class Prepared: """ PEP 503 normalization plus dashes as underscores. """ - return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + # Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503 + # About 3x faster, safe since packages only support alphanumeric characters + value = name.translate(_normalize_table) + # Condense repeats (faster than regex) + while "__" in value: + value = value.replace("__", "_") + return value @staticmethod def legacy_normalize(name): diff --git a/Lib/test/test_importlib/metadata/test_api.py b/Lib/test/test_importlib/metadata/test_api.py index 9f6e12c87e85..3c856a88b77b 100644 --- a/Lib/test/test_importlib/metadata/test_api.py +++ b/Lib/test/test_importlib/metadata/test_api.py @@ -6,6 +6,7 @@ import unittest from importlib.metadata import ( Distribution, PackageNotFoundError, + Prepared, distribution, entry_points, files, @@ -313,3 +314,36 @@ class InvalidateCache(unittest.TestCase): def test_invalidate_cache(self): # No externally observable behavior, but ensures test coverage... importlib.invalidate_caches() + + +class PreparedTests(unittest.TestCase): + def test_normalize(self): + tests = [ + # Simple + ("sample", "sample"), + # Mixed case + ("Sample", "sample"), + ("SAMPLE", "sample"), + ("SaMpLe", "sample"), + # Separator conversions + ("sample-pkg", "sample_pkg"), + ("sample.pkg", "sample_pkg"), + ("sample_pkg", "sample_pkg"), + # Multiple separators + ("sample---pkg", "sample_pkg"), + ("sample___pkg", "sample_pkg"), + ("sample...pkg", "sample_pkg"), + # Mixed separators + ("sample-._pkg", "sample_pkg"), + ("sample_.-pkg", "sample_pkg"), + # Complex + ("Sample__Pkg-name.foo", "sample_pkg_name_foo"), + ("Sample__Pkg.name__foo", "sample_pkg_name_foo"), + # Uppercase with separators + ("SAMPLE-PKG", "sample_pkg"), + ("Sample.Pkg", "sample_pkg"), + ("SAMPLE_PKG", "sample_pkg"), + ] + for name, expected in tests: + with self.subTest(name=name): + self.assertEqual(Prepared.normalize(name), expected) diff --git a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst new file mode 100644 index 000000000000..1d2270957264 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst @@ -0,0 +1,3 @@ +:mod:`importlib.metadata`: Use :meth:`str.translate` to improve performance of +:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade and +Henry Schreiner.