From: Ruben Vorderman Date: Thu, 13 Jun 2024 14:28:59 +0000 (+0200) Subject: gh-120397: Optimize str.count() for single characters (#120398) X-Git-Tag: v3.14.0a1~1506 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=2078eb45ca0db495972a20fcaf96df8fcf48451d;p=thirdparty%2FPython%2Fcpython.git gh-120397: Optimize str.count() for single characters (#120398) --- diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst new file mode 100644 index 000000000000..05c55e8a45eb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-12-13-47-25.gh-issue-120397.n-I_cc.rst @@ -0,0 +1,2 @@ +Improve the througput by up to two times for the :meth:`str.count`, :meth:`bytes.count` and :meth:`bytearray.count` +methods for counting single characters. diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 309ed1554f46..05e700b06258 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -753,6 +753,22 @@ STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n, } +static inline Py_ssize_t +STRINGLIB(count_char_no_maxcount)(const STRINGLIB_CHAR *s, Py_ssize_t n, + const STRINGLIB_CHAR p0) +/* A specialized function of count_char that does not cut off at a maximum. + As a result, the compiler is able to vectorize the loop. */ +{ + Py_ssize_t count = 0; + for (Py_ssize_t i = 0; i < n; i++) { + if (s[i] == p0) { + count++; + } + } + return count; +} + + Py_LOCAL_INLINE(Py_ssize_t) FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, const STRINGLIB_CHAR* p, Py_ssize_t m, @@ -773,6 +789,9 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, else if (mode == FAST_RSEARCH) return STRINGLIB(rfind_char)(s, n, p[0]); else { + if (maxcount == PY_SSIZE_T_MAX) { + return STRINGLIB(count_char_no_maxcount)(s, n, p[0]); + } return STRINGLIB(count_char)(s, n, p[0], maxcount); } }