]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-34636: Use fast path for more chars in SRE category macros. (GH-9170)
authorSergey Fedoseev <fedoseev.sergey@gmail.com>
Tue, 11 Sep 2018 22:47:59 +0000 (03:47 +0500)
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 11 Sep 2018 22:47:59 +0000 (15:47 -0700)
When handling \s, \d, or \w (and their inverse) escapes in bytes regexes this a small but measurable performance improvement.

<!-- issue-number: [bpo-34636](https://www.bugs.python.org/issue34636) -->
https://bugs.python.org/issue34636
<!-- /issue-number -->

Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst [new file with mode: 0644]
Modules/_sre.c

diff --git a/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst
new file mode 100644 (file)
index 0000000..c982b0a
--- /dev/null
@@ -0,0 +1,2 @@
+Speed up re scanning of many non-matching characters for \s \w and \d within
+bytes objects. (microoptimization)
index d67083037e51b85ff5f6cae71b4538449af996ee..483cf5e9ff9ca19bcee149a05ecaa18917e12f6f 100644 (file)
@@ -87,13 +87,13 @@ static const char copyright[] =
 /* search engine state */
 
 #define SRE_IS_DIGIT(ch)\
-    ((ch) < 128 && Py_ISDIGIT(ch))
+    ((ch) <= '9' && Py_ISDIGIT(ch))
 #define SRE_IS_SPACE(ch)\
-    ((ch) < 128 && Py_ISSPACE(ch))
+    ((ch) <= ' ' && Py_ISSPACE(ch))
 #define SRE_IS_LINEBREAK(ch)\
     ((ch) == '\n')
 #define SRE_IS_WORD(ch)\
-    ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
+    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_'))
 
 static unsigned int sre_lower_ascii(unsigned int ch)
 {