]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-118761: Optimise import time for ``shlex`` (#132036)
authorAdam Turner <9087854+AA-Turner@users.noreply.github.com>
Thu, 24 Apr 2025 15:10:46 +0000 (16:10 +0100)
committerGitHub <noreply@github.com>
Thu, 24 Apr 2025 15:10:46 +0000 (16:10 +0100)
Lib/shlex.py
Lib/test/test_shlex.py
Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst [new file with mode: 0644]

index f4821616b62a0fa82ef0354d4901761ab20f74cb..5bf6e0d70e001210c5934c82383fd00bba36ca4d 100644 (file)
@@ -7,11 +7,7 @@
 # iterator interface by Gustavo Niemeyer, April 2003.
 # changes to tokenize more like Posix shells by Vinay Sajip, July 2016.
 
-import os
-import re
 import sys
-from collections import deque
-
 from io import StringIO
 
 __all__ = ["shlex", "split", "quote", "join"]
@@ -20,6 +16,8 @@ class shlex:
     "A lexical analyzer class for simple shell-like syntaxes."
     def __init__(self, instream=None, infile=None, posix=False,
                  punctuation_chars=False):
+        from collections import deque  # deferred import for performance
+
         if isinstance(instream, str):
             instream = StringIO(instream)
         if instream is not None:
@@ -278,6 +276,7 @@ class shlex:
 
     def sourcehook(self, newfile):
         "Hook called on a filename to be sourced."
+        import os.path
         if newfile[0] == '"':
             newfile = newfile[1:-1]
         # This implements cpp-like semantics for relative-path inclusion.
@@ -318,13 +317,17 @@ def join(split_command):
     return ' '.join(quote(arg) for arg in split_command)
 
 
-_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search
-
 def quote(s):
     """Return a shell-escaped version of the string *s*."""
     if not s:
         return "''"
-    if _find_unsafe(s) is None:
+
+    # Use bytes.translate() for performance
+    safe_chars = (b'%+,-./0123456789:=@'
+                  b'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'
+                  b'abcdefghijklmnopqrstuvwxyz')
+    # No quoting is needed if `s` is an ASCII string consisting only of `safe_chars`
+    if s.isascii() and not s.encode().translate(None, delete=safe_chars):
         return s
 
     # use single quotes, and put single quotes into double quotes
index 797c91ee7effdfda9a694a6616257ce52a7a4f03..f35571ea88654ddcc760f8c4dbc036f458a5d484 100644 (file)
@@ -3,6 +3,7 @@ import itertools
 import shlex
 import string
 import unittest
+from test.support import import_helper
 
 
 # The original test data set was from shellwords, by Hartmut Goebel.
@@ -363,6 +364,9 @@ class ShlexTest(unittest.TestCase):
         with self.assertRaises(AttributeError):
             shlex_instance.punctuation_chars = False
 
+    def test_lazy_imports(self):
+        import_helper.ensure_lazy_imports('shlex', {'collections', 're', 'os'})
+
 
 # Allow this test to be used with old shlex.py
 if not getattr(shlex, "split", None):
diff --git a/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst
new file mode 100644 (file)
index 0000000..6b4b3ed
--- /dev/null
@@ -0,0 +1,3 @@
+Improve import times by up to 33x for the :mod:`shlex` module,
+and improve the performance of :func:`shlex.quote` by up to 12x.
+Patch by Adam Turner.