From: Jonathan Wakely Date: Thu, 27 Nov 2025 11:40:46 +0000 (+0000) Subject: libstdc++: Fix spinloop in atomic timed waiting function [PR122878] X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8aefa1855b2b69abec5627b9a6f7d9ee9fab4f85;p=thirdparty%2Fgcc.git libstdc++: Fix spinloop in atomic timed waiting function [PR122878] The __spin_until_impl function was presumably intended to just spin for a short time, then give up and let the caller wait on a futex or condvar. However, __spin_until_impl will never stop spinning unless either the value changes or the timeout is reached. This means that when __spin_until_impl returns, the caller should immediately return (because either the value we were waiting for has changed, or the timeout happened). So __wait_until_impl should never block on a futex or condvar. However, the check for the return value of __spin_until_impl would only return if the value changed (i.e. !__res._M_timeout). So if the timeout occurred, it would fall through and block on the futex/condvar, even though the timeout has already been reached. This was causing a major performance regression in the timed waiting functions of std::counting_semaphore. The simplest fix is to replace __spin_until_impl entirely, just calling __spin_impl to spin a small, finite number of times, and then return immediately if either the value changed or the timeout happened. This ensures that we don't block on the futex/condvar unnecessarily. Removing __spin_until_impl also has the advantage that we no longer keep calling steady_clock::now() on every iteration to check for a timeout. That was also adding significant overhead to the timed waiting functions. libstdc++-v3/ChangeLog: PR libstdc++/122878 * src/c++20/atomic.cc (__spin_until_impl): Remove. (__wait_until_impl): Use __spin_impl instead of __spin_until_impl and return if timeout is reached after spinning. Reviewed-by: Tomasz KamiƄski --- diff --git a/libstdc++-v3/src/c++20/atomic.cc b/libstdc++-v3/src/c++20/atomic.cc index fdd67d83476..16fd91b7d7a 100644 --- a/libstdc++-v3/src/c++20/atomic.cc +++ b/libstdc++-v3/src/c++20/atomic.cc @@ -455,49 +455,6 @@ __cond_wait_until(__condvar& __cv, mutex& __mx, return __wait_clock_t::now() < __atime; } #endif // ! HAVE_PLATFORM_WAIT - -// Unlike __spin_impl, does not always return _M_has_val == true. -// If the deadline has already passed then no fresh value is loaded. -__wait_result_type -__spin_until_impl(const __platform_wait_t* __addr, - const __wait_args_base& __args, - const __wait_clock_t::time_point& __deadline) -{ - using namespace literals::chrono_literals; - - __wait_result_type __res{}; - auto __t0 = __wait_clock_t::now(); - auto __now = __t0; - for (; __now < __deadline; __now = __wait_clock_t::now()) - { - auto __elapsed = __now - __t0; -#ifndef _GLIBCXX_NO_SLEEP - if (__elapsed > 128ms) - this_thread::sleep_for(64ms); - else if (__elapsed > 64us) - this_thread::sleep_for(__elapsed / 2); - else -#endif - if (__elapsed > 4us) - __thread_yield(); - else - { - __res = __detail::__spin_impl(__addr, __args); - if (!__res._M_timeout) - return __res; - } - - __res._M_val = __atomic_load_n(__addr, __args._M_order); - __res._M_has_val = true; - if (__res._M_val != __args._M_old) - { - __res._M_timeout = false; - return __res; - } - } - __res._M_timeout = true; - return __res; -} } // namespace __wait_result_type @@ -509,11 +466,13 @@ __wait_until_impl([[maybe_unused]] const void* __addr, __wait_args_base& __args, if (__args & __wait_flags::__do_spin) { - auto __res = __detail::__spin_until_impl(__wait_addr, __args, __atime); + auto __res = __detail::__spin_impl(__wait_addr, __args); if (!__res._M_timeout) return __res; if (__args & __wait_flags::__spin_only) return __res; + if (__wait_clock_t::now() >= __atime) + return __res; } #ifdef _GLIBCXX_HAVE_PLATFORM_WAIT