]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-44987: Speed up unicode normalization of ASCII strings (GH-28283)
authorDong-hee Na <donghee.na@python.org>
Sat, 11 Sep 2021 15:04:38 +0000 (15:04 +0000)
committerGitHub <noreply@github.com>
Sat, 11 Sep 2021 15:04:38 +0000 (18:04 +0300)
Doc/whatsnew/3.11.rst
Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst [new file with mode: 0644]
Modules/unicodedata.c

index 9befe8f2732e7047bb02d57114a4e23bb0bad6b0..254d7224a7a502218ed8cbc93d5e8060930c332b 100644 (file)
@@ -287,6 +287,9 @@ Optimizations
 
 * :file:`.pdbrc` is now read with ``'utf-8'`` encoding.
 
+* Pure ASCII strings are now normalized in constant time by :func:`unicodedata.normalize`.
+  (Contributed by Dong-hee Na in :issue:`bpo-44987`.)
+
 
 CPython bytecode changes
 ========================
diff --git a/Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst b/Misc/NEWS.d/next/Library/2021-09-11-14-41-02.bpo-44987.Mt8DiX.rst
new file mode 100644 (file)
index 0000000..dec50d8
--- /dev/null
@@ -0,0 +1,2 @@
+Pure ASCII strings are now normalized in constant time by :func:`unicodedata.normalize`.
+Patch by Dong-hee Na.
index b4563f331d5a8677b3013f6ca583483a8b676f52..97585725c0b6ecf55dbf74f148308138a1e176fc 100644 (file)
@@ -807,6 +807,10 @@ is_normalized_quickcheck(PyObject *self, PyObject *input, bool nfc, bool k,
         return NO;
     }
 
+    if (PyUnicode_IS_ASCII(input)) {
+        return YES;
+    }
+
     Py_ssize_t i, len;
     int kind;
     const void *data;