gh-118761: Improve the import time of ``gettext`` (#128898)

author Eli Schwartz <eschwartz@gentoo.org>

Mon, 20 Jan 2025 00:01:20 +0000 (19:01 -0500)

committer GitHub <noreply@github.com>

Mon, 20 Jan 2025 00:01:20 +0000 (00:01 +0000)
author Eli Schwartz <eschwartz@gentoo.org>
Mon, 20 Jan 2025 00:01:20 +0000 (19:01 -0500)
committer GitHub <noreply@github.com>
Mon, 20 Jan 2025 00:01:20 +0000 (00:01 +0000)
diff --git a/Lib/gettext.py b/Lib/gettext.py

index a0d81cf846a05c2bd1ec9e4d0b110d27c7785392..4c1f9427459b14c8192180e6d637c60fc3cd2a0c 100644 (file)
--- a/Lib/gettext.py
+++ b/Lib/gettext.py
@@ -48,7 +48,6 @@ internationalized, to the local language and cultural habits.
  
  import operator
  import os
-import re
  import sys
  
  
@@ -70,22 +69,26 @@ _default_localedir = os.path.join(sys.base_prefix, 'share', 'locale')
  # https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms
  # http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y
  
-_token_pattern = re.compile(r"""
-        (?P<WHITESPACES>[ \t]+)                    | # spaces and horizontal tabs
-        (?P<NUMBER>[0-9]+\b)                       | # decimal integer
-        (?P<NAME>n\b)                              | # only n is allowed
-        (?P<PARENTHESIS>[()])                      |
-        (?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
-                                                     # <=, >=, ==, !=, &&, ||,
-                                                     # ? :
-                                                     # unary and bitwise ops
-                                                     # not allowed
-        (?P<INVALID>\w+|.)                           # invalid token
-    """, re.VERBOSE|re.DOTALL)
-
+_token_pattern = None
  
  def _tokenize(plural):
-    for mo in re.finditer(_token_pattern, plural):
+    global _token_pattern
+    if _token_pattern is None:
+        import re
+        _token_pattern = re.compile(r"""
+                (?P<WHITESPACES>[ \t]+)                    | # spaces and horizontal tabs
+                (?P<NUMBER>[0-9]+\b)                       | # decimal integer
+                (?P<NAME>n\b)                              | # only n is allowed
+                (?P<PARENTHESIS>[()])                      |
+                (?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
+                                                             # <=, >=, ==, !=, &&, ||,
+                                                             # ? :
+                                                             # unary and bitwise ops
+                                                             # not allowed
+                (?P<INVALID>\w+|.)                           # invalid token
+            """, re.VERBOSE|re.DOTALL)
+
+    for mo in _token_pattern.finditer(plural):
          kind = mo.lastgroup
          if kind == 'WHITESPACES':
              continue
diff --git a/Misc/NEWS.d/next/Library/2025-01-15-19-16-50.gh-issue-118761.cbW2ZL.rst b/Misc/NEWS.d/next/Library/2025-01-15-19-16-50.gh-issue-118761.cbW2ZL.rst

new file mode 100644 (file)

index 0000000..0eef877
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-01-15-19-16-50.gh-issue-118761.cbW2ZL.rst
@@ -0,0 +1,3 @@
+Reduce import time of :mod:`gettext` by up to ten times, by importing
+:mod:`re` on demand. In particular, ``re`` is no longer implicitly
+exposed as ``gettext.re``. Patch by Eli Schwartz.
author	Eli Schwartz <eschwartz@gentoo.org>
	Mon, 20 Jan 2025 00:01:20 +0000 (19:01 -0500)
committer	GitHub <noreply@github.com>
	Mon, 20 Jan 2025 00:01:20 +0000 (00:01 +0000)
Lib/gettext.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2025-01-15-19-16-50.gh-issue-118761.cbW2ZL.rst	[new file with mode: 0644]	patch \| blob