From: mpage Date: Wed, 9 Apr 2025 17:34:12 +0000 (-0700) Subject: gh-129987: Disable GCC SLP autovectorization for the interpreter loop on x86-64 ... X-Git-Tag: v3.14.0b1~559 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1f5682f3a27516833f7c317707dd359280dba6e7;p=thirdparty%2FPython%2Fcpython.git gh-129987: Disable GCC SLP autovectorization for the interpreter loop on x86-64 (#132295) The SLP autovectorizer can cause poor code generation for opcode dispatch, negating any benefit we get from vectorization elsewhere in the interpreter loop. --- diff --git a/Python/ceval.c b/Python/ceval.c index a59b2b7a1686..47d068edac27 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -948,7 +948,18 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch) #include "generated_cases.c.h" #endif -PyObject* _Py_HOT_FUNCTION +#if (defined(__GNUC__) && !defined(__clang__)) && defined(__x86_64__) +/* + * gh-129987: The SLP autovectorizer can cause poor code generation for opcode + * dispatch, negating any benefit we get from vectorization elsewhere in the + * interpreter loop. + */ +#define DONT_SLP_VECTORIZE __attribute__((optimize ("no-tree-slp-vectorize"))) +#else +#define DONT_SLP_VECTORIZE +#endif + +PyObject* _Py_HOT_FUNCTION DONT_SLP_VECTORIZE _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { _Py_EnsureTstateNotNULL(tstate);