]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-143658: importlib.metadata: Use `str.translate` to improve performance of `importl...
authorHugo van Kemenade <1324225+hugovk@users.noreply.github.com>
Tue, 13 Jan 2026 06:54:15 +0000 (08:54 +0200)
committerGitHub <noreply@github.com>
Tue, 13 Jan 2026 06:54:15 +0000 (08:54 +0200)
Co-authored-by: Henry Schreiner <henryschreineriii@gmail.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Co-authored-by: Bartosz Sławecki <bartosz@ilikepython.com>
Lib/importlib/metadata/__init__.py
Lib/test/test_importlib/metadata/test_api.py
Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst [new file with mode: 0644]

index b010bb8525e5ccf6ecea4fc75044a97a514b0471..9b723b4ec15e12f5dd98f06248f91ce03e051fd1 100644 (file)
@@ -890,6 +890,14 @@ class Lookup:
         return itertools.chain(infos, eggs)
 
 
+# Translation table for Prepared.normalize: lowercase and
+# replace "-" (hyphen) and "." (dot) with "_" (underscore).
+_normalize_table = str.maketrans(
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ-.",
+    "abcdefghijklmnopqrstuvwxyz__",
+)
+
+
 class Prepared:
     """
     A prepared search query for metadata on a possibly-named package.
@@ -925,7 +933,13 @@ class Prepared:
         """
         PEP 503 normalization plus dashes as underscores.
         """
-        return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
+        # Emulates ``re.sub(r"[-_.]+", "-", name).lower()`` from PEP 503
+        # About 3x faster, safe since packages only support alphanumeric characters
+        value = name.translate(_normalize_table)
+        # Condense repeats (faster than regex)
+        while "__" in value:
+            value = value.replace("__", "_")
+        return value
 
     @staticmethod
     def legacy_normalize(name):
index 9f6e12c87e859cbc87d9fd5a19e332566a5bc68d..3c856a88b77bf64c6b99752a2c523103942c56a6 100644 (file)
@@ -6,6 +6,7 @@ import unittest
 from importlib.metadata import (
     Distribution,
     PackageNotFoundError,
+    Prepared,
     distribution,
     entry_points,
     files,
@@ -313,3 +314,36 @@ class InvalidateCache(unittest.TestCase):
     def test_invalidate_cache(self):
         # No externally observable behavior, but ensures test coverage...
         importlib.invalidate_caches()
+
+
+class PreparedTests(unittest.TestCase):
+    def test_normalize(self):
+        tests = [
+            # Simple
+            ("sample", "sample"),
+            # Mixed case
+            ("Sample", "sample"),
+            ("SAMPLE", "sample"),
+            ("SaMpLe", "sample"),
+            # Separator conversions
+            ("sample-pkg", "sample_pkg"),
+            ("sample.pkg", "sample_pkg"),
+            ("sample_pkg", "sample_pkg"),
+            # Multiple separators
+            ("sample---pkg", "sample_pkg"),
+            ("sample___pkg", "sample_pkg"),
+            ("sample...pkg", "sample_pkg"),
+            # Mixed separators
+            ("sample-._pkg", "sample_pkg"),
+            ("sample_.-pkg", "sample_pkg"),
+            # Complex
+            ("Sample__Pkg-name.foo", "sample_pkg_name_foo"),
+            ("Sample__Pkg.name__foo", "sample_pkg_name_foo"),
+            # Uppercase with separators
+            ("SAMPLE-PKG", "sample_pkg"),
+            ("Sample.Pkg", "sample_pkg"),
+            ("SAMPLE_PKG", "sample_pkg"),
+        ]
+        for name, expected in tests:
+            with self.subTest(name=name):
+                self.assertEqual(Prepared.normalize(name), expected)
diff --git a/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst b/Misc/NEWS.d/next/Library/2026-01-10-15-40-57.gh-issue-143658.Ox6pE5.rst
new file mode 100644 (file)
index 0000000..1d22709
--- /dev/null
@@ -0,0 +1,3 @@
+:mod:`importlib.metadata`: Use :meth:`str.translate` to improve performance of
+:meth:`!importlib.metadata.Prepared.normalize`. Patch by Hugo van Kemenade and
+Henry Schreiner.