From: Sergey Fedoseev Date: Tue, 11 Sep 2018 22:47:59 +0000 (+0500) Subject: bpo-34636: Use fast path for more chars in SRE category macros. (GH-9170) X-Git-Tag: v3.8.0a1~1043 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ec014a101a7f6243b95dfc08acfe1542b9fa5d39;p=thirdparty%2FPython%2Fcpython.git bpo-34636: Use fast path for more chars in SRE category macros. (GH-9170) When handling \s, \d, or \w (and their inverse) escapes in bytes regexes this a small but measurable performance improvement. https://bugs.python.org/issue34636 --- diff --git a/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst new file mode 100644 index 000000000000..c982b0a4cda0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-09-11-15-04-05.bpo-34636.capCmt.rst @@ -0,0 +1,2 @@ +Speed up re scanning of many non-matching characters for \s \w and \d within +bytes objects. (microoptimization) diff --git a/Modules/_sre.c b/Modules/_sre.c index d67083037e51..483cf5e9ff9c 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -87,13 +87,13 @@ static const char copyright[] = /* search engine state */ #define SRE_IS_DIGIT(ch)\ - ((ch) < 128 && Py_ISDIGIT(ch)) + ((ch) <= '9' && Py_ISDIGIT(ch)) #define SRE_IS_SPACE(ch)\ - ((ch) < 128 && Py_ISSPACE(ch)) + ((ch) <= ' ' && Py_ISSPACE(ch)) #define SRE_IS_LINEBREAK(ch)\ ((ch) == '\n') #define SRE_IS_WORD(ch)\ - ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_')) + ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_')) static unsigned int sre_lower_ascii(unsigned int ch) {