From: Daniele Parmeggiani <8658291+dpdani@users.noreply.github.com> Date: Mon, 23 Mar 2026 20:55:06 +0000 (+0100) Subject: gh-135871: Reload lock internal state while spinning in `PyMutex_LockTimed` (gh-146064) X-Git-Tag: v3.15.0a8~201 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=daa159f98b3689ae6a587bfb978b931762f9dbc9;p=thirdparty%2FPython%2Fcpython.git gh-135871: Reload lock internal state while spinning in `PyMutex_LockTimed` (gh-146064) Add atomic loads in the slow path of PyMutex to increase the number of lock acquisitions per second that threads can make on a shared mutex. --- diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-19-16-16-40.gh-issue-135871.jSExZ3.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-19-16-16-40.gh-issue-135871.jSExZ3.rst new file mode 100644 index 000000000000..29103e469064 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-19-16-16-40.gh-issue-135871.jSExZ3.rst @@ -0,0 +1 @@ +Improve multithreaded scaling of PyMutex in low-contention scenarios by reloading the lock's internal state, without slowing down high-contention scenarios. diff --git a/Python/lock.c b/Python/lock.c index ad97bfd93c84..752a5899e088 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -27,8 +27,10 @@ static const PyTime_t TIME_TO_BE_FAIR_NS = 1000*1000; // enabled. #if Py_GIL_DISABLED static const int MAX_SPIN_COUNT = 40; +static const int RELOAD_SPIN_MASK = 3; #else static const int MAX_SPIN_COUNT = 0; +static const int RELOAD_SPIN_MASK = 1; #endif struct mutex_entry { @@ -79,6 +81,16 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) }; Py_ssize_t spin_count = 0; +#ifdef Py_GIL_DISABLED + // Using thread-id as a way of reducing contention further in the reload below. + // It adds a pseudo-random starting offset to the recurrence, so that threads + // are less likely to try and run compare-exchange at the same time. + // The lower bits of platform thread ids are likely to not be random, + // hence the right shift. + const Py_ssize_t tid = (Py_ssize_t)(_Py_ThreadId() >> 12); +#else + const Py_ssize_t tid = 0; +#endif for (;;) { if ((v & _Py_LOCKED) == 0) { // The lock is unlocked. Try to grab it. @@ -92,6 +104,9 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) // Spin for a bit. _Py_yield(); spin_count++; + if (((spin_count + tid) & RELOAD_SPIN_MASK) == 0) { + v = _Py_atomic_load_uint8_relaxed(&m->_bits); + } continue; }