From cbf9b8cc08364cdcf355fe1c897f698331b49a41 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 13 Jan 2026 08:54:15 +0200 Subject: [PATCH] gh-143658: importlib.metadata: Use `str.translate` to improve performance of `importlib.metadata.Prepared.normalized` (#143660) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Schreiner Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Bartosz Sławecki --- Lib/importlib/metadata/__init__.py | 16 ++++++++- Lib/test/test_importlib/metadata/test_api.py | 34 +++++++++++++++++++ ...-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst | 3 ++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index b010bb8525e5..9b723b4ec15e 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -890,6 +890,14 @@ class Lookup: return itertools.chain(infos, eggs) +# Translation table for Prepared.normalize: lowercase and +# replace "-" (hyphen) and "." (dot) with "_" (underscore). +_normalize_table = str.maketrans( + "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.", + "abcdefghijklmnopqrstuvwxyz__", +) + + class Prepared: """ A prepared search query for metadata on a possibly-named package. @@ -925,7 +933,13 @@ class Prepared: """ PEP 503 normalization plus dashes as underscores. """ - return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_') + # Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503 + # About 3x faster, safe since packages only support alphanumeric characters + value = name.translate(_normalize_table) + # Condense repeats (faster than regex) + while "__" in value: + value = value.replace("__", "_") + return value @staticmethod def legacy_normalize(name): diff --git a/Lib/test/test_importlib/metadata/test_api.py b/Lib/test/test_importlib/metadata/test_api.py index 9f6e12c87e85..3c856a88b77b 100644 --- a/Lib/test/test_importlib/metadata/test_api.py +++ b/Lib/test/test_importlib/metadata/test_api.py @@ -6,6 +6,7 @@ import unittest from importlib.metadata import ( Distribution, PackageNotFoundError, + Prepared, distribution, entry_points, files, @@ -313,3 +314,36 @@ class InvalidateCache(unittest.TestCase): def test_invalidate_cache(self): # No externally observable behavior, but ensures test coverage... importlib.invalidate_caches() + + +class PreparedTests(unittest.TestCase): + def test_normalize(self): + tests = [ + # Simple + ("sample", "sample"), + # Mixed case + ("Sample", "sample"), + ("SAMPLE", "sample"), + ("SaMpLe", "sample"), + # Separator conversions + ("sample-pkg", "sample_pkg"), + ("sample.pkg", "sample_pkg"), + ("sample_pkg", "sample_pkg"), + # Multiple separators + ("sample---pkg", "sample_pkg"), + ("sample___pkg", "sample_pkg"), + ("sample...pkg", "sample_pkg"), + # Mixed separators + ("sample-._pkg", "sample_pkg"), + ("sample_.-pkg", "sample_pkg"), + # Complex + ("Sample__Pkg-name.foo", "sample_pkg_name_foo"), + ("Sample__Pkg.name__foo", "sample_pkg_name_foo"), + # Uppercase with separators + ("SAMPLE-PKG", "sample_pkg"), + ("Sample.Pkg", "sample_pkg"), + ("SAMPLE_PKG", "sample_pkg"), + ] + for name, expected in tests: + with self.subTest(name=name): + self.assertEqual(Prepared.normalize(name), expected) diff --git a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst new file mode 100644 index 000000000000..1d2270957264 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst @@ -0,0 +1,3 @@ +:mod:`importlib.metadata`: Use :meth:`str.translate` to improve performance of +:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade and +Henry Schreiner. -- 2.47.3