libstdc++: Future-proof C++20 atomic wait/notify

author Jonathan Wakely <jwakely@redhat.com>

Sat, 15 Nov 2025 18:19:28 +0000 (18:19 +0000)

committer Jonathan Wakely <redi@gcc.gnu.org>

Thu, 27 Nov 2025 09:43:46 +0000 (09:43 +0000)
author Jonathan Wakely <jwakely@redhat.com>
Sat, 15 Nov 2025 18:19:28 +0000 (18:19 +0000)
committer Jonathan Wakely <redi@gcc.gnu.org>
Thu, 27 Nov 2025 09:43:46 +0000 (09:43 +0000)
diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver

index 2e48241d51f90eac179d54207ab50da625ca1dec..4713ff2208d1ec0de3b9abc503960c389c9621a5 100644 (file)
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -2553,7 +2553,7 @@ GLIBCXX_3.4.35 {
      _ZNSt8__detail11__wait_implEPKvRNS_16__wait_args_baseE;
      _ZNSt8__detail13__notify_implEPKvbRKNS_16__wait_args_baseE;
      _ZNSt8__detail17__wait_until_implEPKvRNS_16__wait_args_baseERKNSt6chrono8durationI[lx]St5ratioIL[lx]1EL[lx]1000000000EEEE;
-    _ZNSt8__detail11__wait_args22_M_load_proxy_wait_valEPKv;
+    _ZNSt8__detail11__wait_args19_M_setup_proxy_waitEPKv;
  
      # std::chrono::gps_clock::now, tai_clock::now
      _ZNSt6chrono9gps_clock3nowEv;
diff --git a/libstdc++-v3/include/bits/atomic_timed_wait.h b/libstdc++-v3/include/bits/atomic_timed_wait.h

index 30f7ff6168404041940772214c742e120273bf76..5b31580506688187b96ee443557865d0881c991c 100644 (file)
--- a/libstdc++-v3/include/bits/atomic_timed_wait.h
+++ b/libstdc++-v3/include/bits/atomic_timed_wait.h
@@ -75,14 +75,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
           return chrono::ceil<__w_dur>(__atime);
        }
  
-#ifdef _GLIBCXX_HAVE_LINUX_FUTEX
-#define _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT
-#else
-// define _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT and implement __platform_wait_until
-// if there is a more efficient primitive supported by the platform
-// (e.g. __ulock_wait) which is better than pthread_cond_clockwait.
-#endif // ! HAVE_LINUX_FUTEX
-
      __wait_result_type
      __wait_until_impl(const void* __addr, __wait_args_base& __args,
                       const __wait_clock_t::duration& __atime);
@@ -156,9 +148,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                   const chrono::time_point<_Clock, _Dur>& __atime,
                                   bool __bare_wait = false) noexcept
      {
-#ifndef _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT
-      __glibcxx_assert(false); // This function can't be used for proxy wait.
-#endif
+      // This function must not be used if __wait_impl might use a proxy wait:
+      __glibcxx_assert(__platform_wait_uses_type<__detail::__platform_wait_t>);
+
        __detail::__wait_args __args{ __addr, __old, __order, __bare_wait };
        auto __res = __detail::__wait_until(__addr, __args, __atime);
        return !__res._M_timeout; // C++26 will also return last observed __val
@@ -208,9 +200,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                                 const chrono::duration<_Rep, _Period>& __rtime,
                                 bool __bare_wait = false) noexcept
      {
-#ifndef _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT
-      __glibcxx_assert(false); // This function can't be used for proxy wait.
-#endif
+      // This function must not be used if __wait_impl might use a proxy wait:
+      __glibcxx_assert(__platform_wait_uses_type<__detail::__platform_wait_t>);
+
        __detail::__wait_args __args{ __addr, __old, __order, __bare_wait };
        auto __res = __detail::__wait_for(__addr, __args, __rtime);
        return !__res._M_timeout; // C++26 will also return last observed __val
diff --git a/libstdc++-v3/include/bits/atomic_wait.h b/libstdc++-v3/include/bits/atomic_wait.h

index 95151479c12059c372f12d246f64d5f6a9318786..6d8c0de4af68cea062bb81e4f208311bacf87a44 100644 (file)
--- a/libstdc++-v3/include/bits/atomic_wait.h
+++ b/libstdc++-v3/include/bits/atomic_wait.h
@@ -45,35 +45,51 @@
  namespace std _GLIBCXX_VISIBILITY(default)
  {
  _GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+  namespace __detail
+  {
+    // TODO: this needs to be false for types with padding, e.g. __int20.
+    // TODO: should this be true only for integral, enum, and pointer types?
+    template<typename _Tp>
+      concept __waitable
+       = is_scalar_v<_Tp> && __builtin_popcountg(sizeof(_Tp)) == 1
+           && (sizeof(_Tp) <= sizeof(__UINT64_TYPE__));
+  }
+
+#if defined _GLIBCXX_HAVE_LINUX_FUTEX
    namespace __detail
    {
-#ifdef _GLIBCXX_HAVE_LINUX_FUTEX
-#define _GLIBCXX_HAVE_PLATFORM_WAIT 1
+    // Use futex syscall on int objects.
      using __platform_wait_t = int;
      inline constexpr size_t __platform_wait_alignment = 4;
+  }
+  // Defined to true for a subset of __waitable types which are statically
+  // known to definitely be able to use futex, not a proxy wait.
+  template<typename _Tp>
+    inline constexpr bool __platform_wait_uses_type
+      = __detail::__waitable<_Tp>
+         && sizeof(_Tp) == sizeof(int) && alignof(_Tp) >= 4;
  #else
  // define _GLIBCX_HAVE_PLATFORM_WAIT and implement __platform_wait()
  // and __platform_notify() if there is a more efficient primitive supported
  // by the platform (e.g. __ulock_wait()/__ulock_wake()) which is better than
  // a mutex/condvar based wait.
+  namespace __detail
+  {
  # if ATOMIC_LONG_LOCK_FREE == 2
      using __platform_wait_t = unsigned long;
  # else
      using __platform_wait_t = unsigned int;
  # endif
      inline constexpr size_t __platform_wait_alignment
-      = __alignof__(__platform_wait_t);
-#endif
+      = sizeof(__platform_wait_t) < __alignof__(__platform_wait_t)
+         ? __alignof__(__platform_wait_t) : sizeof(__platform_wait_t);
    } // namespace __detail
  
-  template<typename _Tp>
-    inline constexpr bool __platform_wait_uses_type
-#ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
-      = is_scalar_v<_Tp>
-       && ((sizeof(_Tp) == sizeof(__detail::__platform_wait_t))
-       && (alignof(_Tp) >= __detail::__platform_wait_alignment));
-#else
-      = false;
+  // This must be false for the general case where we don't know of any
+  // futex-like syscall.
+  template<typename>
+    inline constexpr bool __platform_wait_uses_type = false;
  #endif
  
    namespace __detail
@@ -105,10 +121,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         return __builtin_memcmp(&__a, &__b, sizeof(_Tp)) == 0;
        }
  
-    // lightweight std::optional<__platform_wait_t>
+    // Storage for up to 64 bits of value, should be considered opaque bits.
+    using __wait_value_type = __UINT64_TYPE__;
+
+    // lightweight std::optional<__wait_value_type>
      struct __wait_result_type
      {
-      __platform_wait_t _M_val;
+      __wait_value_type _M_val;
        unsigned char _M_has_val : 1; // _M_val value was loaded before return.
        unsigned char _M_timeout : 1; // Waiting function ended with timeout.
        unsigned char _M_unused : 6;  // padding
@@ -116,12 +135,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  
      enum class __wait_flags : __UINT_LEAST32_TYPE__
      {
-       __abi_version = 0,
-       __proxy_wait = 1,
+       __abi_version = 0x00000000,
+       // currently unused = 1,
         __track_contention = 2,
         __do_spin = 4,
         __spin_only = 8, // Ignored unless __do_spin is also set.
-       // __abi_version_mask = 0xffff0000,
+       // __abi_version_mask = 0xff000000,
      };
  
      [[__gnu__::__always_inline__]]
@@ -143,8 +162,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      {
        __wait_flags _M_flags;
        int _M_order = __ATOMIC_ACQUIRE;
-      __platform_wait_t _M_old = 0;
+      __wait_value_type _M_old = 0;
        void* _M_wait_state = nullptr;
+      const void* _M_obj = nullptr;  // The address of the object to wait on.
+      unsigned char _M_obj_size = 0; // The size of that object.
  
        // Test whether _M_flags & __flags is non-zero.
        bool
@@ -162,53 +183,88 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
         explicit
         __wait_args(const _Tp* __addr, bool __bare_wait = false) noexcept
         : __wait_args_base{ _S_flags_for(__addr, __bare_wait) }
-       { }
+       {
+         _M_obj = __addr; // Might be replaced by _M_setup_wait
+         if constexpr (__waitable<_Tp>)
+           // __wait_impl might be able to wait directly on __addr
+           // instead of using a proxy, depending on its size.
+           _M_obj_size = sizeof(_Tp);
+       }
  
        __wait_args(const __platform_wait_t* __addr, __platform_wait_t __old,
                   int __order, bool __bare_wait = false) noexcept
-      : __wait_args_base{ _S_flags_for(__addr, __bare_wait), __order, __old }
-      { }
+      : __wait_args(__addr, __bare_wait)
+      {
+       _M_order = __order;
+       _M_old = __old;
+      }
  
        __wait_args(const __wait_args&) noexcept = default;
        __wait_args& operator=(const __wait_args&) noexcept = default;
  
-      template<typename _ValFn,
-              typename _Tp = decay_t<decltype(std::declval<_ValFn&>()())>>
+      template<typename _Tp, typename _ValFn>
         _Tp
-       _M_setup_wait(const void* __addr, _ValFn __vfn,
+       _M_setup_wait(const _Tp* __addr, _ValFn __vfn,
                       __wait_result_type __res = {})
         {
-         if constexpr (__platform_wait_uses_type<_Tp>)
-           {
-             // If the wait is not proxied, the value we check when waiting
-             // is the value of the atomic variable itself.
+         static_assert(is_same_v<_Tp, decay_t<decltype(__vfn())>>);
  
-             if (__res._M_has_val) // The previous wait loaded a recent value.
+         if (__res._M_has_val) // A previous wait loaded a recent value.
+           {
+             _M_old = __res._M_val;
+             if constexpr (!__platform_wait_uses_type<_Tp>)
                 {
-                 _M_old = __res._M_val;
-                 return __builtin_bit_cast(_Tp, __res._M_val);
+                 // __res._M_val might be the value of a proxy wait object,
+                 // not the value of *__addr. Call __vfn() to get new value.
+                 return __vfn();
                 }
-             else // Load the value from __vfn
+             // Not a proxy wait, so the value in __res._M_val was loaded
+             // from *__addr and we don't need to call __vfn().
+             else if constexpr (sizeof(_Tp) == sizeof(__UINT32_TYPE__))
+               return __builtin_bit_cast(_Tp, (__UINT32_TYPE__)_M_old);
+             else if constexpr (sizeof(_Tp) == sizeof(__UINT64_TYPE__))
+               return __builtin_bit_cast(_Tp, (__UINT64_TYPE__)_M_old);
+             else
                 {
-                 _Tp __val = __vfn();
-                 _M_old = __builtin_bit_cast(__platform_wait_t, __val);
-                 return __val;
+                 static_assert(false); // Unsupported size
+                 return {};
                 }
             }
-         else // It's a proxy wait and the proxy's _M_ver is used.
-           {
-             if (__res._M_has_val) // The previous wait loaded a recent value.
-               _M_old = __res._M_val;
-             else // Load _M_ver from the proxy (must happen before __vfn()).
-               _M_load_proxy_wait_val(__addr);
-             return __vfn();
-           }
+
+         if constexpr (!__platform_wait_uses_type<_Tp>)
+           if (_M_setup_proxy_wait(__addr))
+             {
+               // We will use a proxy wait for this object.
+               // The library has set _M_obj and _M_obj_size and _M_old.
+               // Call __vfn to load the current value from *__addr
+               // (which must happen after the call to _M_setup_proxy_wait).
+               return __vfn();
+             }
+
+         // We will use a futex-like operation to wait on this object,
+         // and so can just load the value and store it into _M_old.
+         auto __val = __vfn();
+         // We have to consider various sizes, because a future libstdc++.so
+         // might enable non-proxy waits for additional sizes.
+         if constexpr (sizeof(_Tp) == sizeof(__UINT64_TYPE__))
+           _M_old = __builtin_bit_cast(__UINT64_TYPE__, __val);
+         else if constexpr (sizeof(_Tp) == sizeof(__UINT32_TYPE__))
+           _M_old = __builtin_bit_cast(__UINT32_TYPE__, __val);
+         else if constexpr (sizeof(_Tp) == sizeof(__UINT16_TYPE__))
+           _M_old = __builtin_bit_cast(__UINT16_TYPE__, __val);
+         else if constexpr (sizeof(_Tp) == sizeof(__UINT8_TYPE__))
+           _M_old = __builtin_bit_cast(__UINT8_TYPE__, __val);
+         else // _M_setup_proxy_wait should have returned true for this type!
+           __glibcxx_assert(false);
+         return __val;
         }
  
      private:
-      // Populates _M_wait_state and _M_old from the proxy for __addr.
-      void
-      _M_load_proxy_wait_val(const void* __addr);
+      // Returns true if a proxy wait will be used for __addr, false otherwise.
+      // If true, _M_wait_state, _M_obj, _M_obj_size, and _M_old are set.
+      // If false, data members are unchanged.
+      bool
+      _M_setup_proxy_wait(const void* __addr);
  
        template<typename _Tp>
         static constexpr __wait_flags
@@ -218,8 +274,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
           __wait_flags __res = __abi_version | __do_spin;
           if (!__bare_wait)
             __res |= __track_contention;
-         if constexpr (!__platform_wait_uses_type<_Tp>)
-           __res |= __proxy_wait;
           return __res;
         }
      };
@@ -234,6 +288,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    // Wait on __addr while __pred(__vfn()) is false.
    // If __bare_wait is false, increment a counter while waiting.
    // For callers that keep their own count of waiters, use __bare_wait=true.
+  // The effect of __vfn() must be an atomic load from __addr and nothing else.
    template<typename _Tp, typename _Pred, typename _ValFn>
      void
      __atomic_wait_address(const _Tp* __addr, _Pred&& __pred, _ValFn&& __vfn,
@@ -255,9 +310,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                           __detail::__platform_wait_t __old,
                           int __order, bool __bare_wait = false)
    {
-#ifndef _GLIBCXX_HAVE_PLATFORM_WAIT
-    __glibcxx_assert(false); // This function can't be used for proxy wait.
-#endif
+    // This function must not be used if __wait_impl might use a proxy wait:
+    __glibcxx_assert(__platform_wait_uses_type<__detail::__platform_wait_t>);
+
      __detail::__wait_args __args{ __addr, __old, __order, __bare_wait };
      // C++26 will not ignore the return value here
      __detail::__wait_impl(__addr, __args);
diff --git a/libstdc++-v3/src/c++20/atomic.cc b/libstdc++-v3/src/c++20/atomic.cc

index e280045b619d68a83745d87fee7169eb8da0acad..80915617f0bf6446af185af7d62b195babb5c1b0 100644 (file)
--- a/libstdc++-v3/src/c++20/atomic.cc
+++ b/libstdc++-v3/src/c++20/atomic.cc
@@ -27,25 +27,18 @@
  #if __glibcxx_atomic_wait
  #include <atomic>
  #include <bits/atomic_timed_wait.h>
-#include <bits/functional_hash.h>
-#include <cstdint>
+#include <cstdint> // uint32_t, uint64_t
+#include <climits> // INT_MAX
+#include <cerrno>  // errno, ETIMEDOUT, etc.
  #include <bits/std_mutex.h>  // std::mutex, std::__condvar
+#include <bits/functexcept.h> // __throw_system_error
+#include <bits/functional_hash.h>
  
  #ifdef _GLIBCXX_HAVE_LINUX_FUTEX
-# include <cerrno>
-# include <climits>
  # include <unistd.h>
  # include <syscall.h>
-# include <bits/functexcept.h>
-# include <sys/time.h>
-#endif
-
-#ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
-# ifndef _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT
-// __waitable_state assumes that we consistently use the same implementation
-// (i.e. futex vs mutex+condvar) for timed and untimed waiting.
-#  error "This configuration is not currently supported"
-# endif
+# include <sys/time.h> // timespec
+# define _GLIBCXX_HAVE_PLATFORM_WAIT 1
  #endif
  
  #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@@ -77,25 +70,45 @@ namespace
    };
  
    void
-  __platform_wait(const int* __addr, int __val) noexcept
+  __platform_wait(const int* addr, int val, int /* obj_size */) noexcept
    {
-    auto __e = syscall (SYS_futex, __addr,
-                       static_cast<int>(__futex_wait_flags::__wait_private),
-                       __val, nullptr);
-    if (!__e || errno == EAGAIN)
-      return;
-    if (errno != EINTR)
-      __throw_system_error(errno);
+    if (syscall(SYS_futex, addr,
+               static_cast<int>(__futex_wait_flags::__wait_private),
+               val, nullptr))
+      if (errno != EAGAIN && errno != EINTR)
+       __throw_system_error(errno);
    }
  
    void
-  __platform_notify(const int* __addr, bool __all) noexcept
+  __platform_notify(const int* addr, bool all, int) noexcept
    {
-    syscall (SYS_futex, __addr,
-            static_cast<int>(__futex_wait_flags::__wake_private),
-            __all ? INT_MAX : 1);
+    syscall(SYS_futex, addr,
+           static_cast<int>(__futex_wait_flags::__wake_private),
+           all ? INT_MAX : 1);
    }
-#endif
+
+  // returns true if wait ended before timeout
+  bool
+  __platform_wait_until(const __platform_wait_t* addr,
+                       __platform_wait_t val,
+                       const __wait_clock_t::time_point& __atime,
+                       int /* obj_size */) noexcept
+  {
+    struct timespec timeout = chrono::__to_timeout_timespec(__atime);
+
+    if (syscall(SYS_futex, addr,
+               static_cast<int>(__futex_wait_flags::__wait_bitset_private),
+               val, &timeout, nullptr,
+               static_cast<int>(__futex_wait_flags::__bitset_match_any)))
+      {
+       if (errno == ETIMEDOUT)
+         return false;
+       if (errno != EAGAIN && errno != EINTR)
+         __throw_system_error(errno);
+      }
+    return true;
+  }
+#endif // HAVE_LINUX_FUTEX
  
    // The state used by atomic waiting and notifying functions.
    struct __waitable_state
@@ -107,7 +120,7 @@ namespace
      // Count of threads blocked waiting on this state.
      alignas(_S_align) __platform_wait_t _M_waiters = 0;
  
-#ifndef _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT
+#ifndef _GLIBCXX_HAVE_PLATFORM_WAIT
      mutex _M_mtx;
  
      // This type meets the Cpp17BasicLockable requirements.
@@ -123,7 +136,7 @@ namespace
      // use this for waiting and notifying functions instead.
      alignas(_S_align) __platform_wait_t _M_ver = 0;
  
-#ifndef _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT
+#ifndef _GLIBCXX_HAVE_PLATFORM_WAIT
      __condvar _M_cv;
  #endif
  
@@ -215,18 +228,18 @@ namespace
    __wait_result_type
    __spin_impl(const __platform_wait_t* __addr, const __wait_args_base& __args)
    {
-    __platform_wait_t __val{};
+    __wait_value_type wval;
      for (auto __i = 0; __i < __atomic_spin_count; ++__i)
        {
-       __atomic_load(__addr, &__val, __args._M_order);
-       if (__val != __args._M_old)
-         return { ._M_val = __val, ._M_has_val = true, ._M_timeout = false };
+       wval = __atomic_load_n(__addr, __args._M_order);
+       if (wval != __args._M_old)
+         return { ._M_val = wval, ._M_has_val = true, ._M_timeout = false };
         if (__i < __atomic_spin_count_relax)
           __thread_relax();
         else
           __thread_yield();
        }
-    return { ._M_val = __val, ._M_has_val = true, ._M_timeout = true };
+    return { ._M_val = wval, ._M_has_val = true, ._M_timeout = true };
    }
  
    inline __waitable_state*
@@ -237,32 +250,70 @@ namespace
      return static_cast<__waitable_state*>(args._M_wait_state);
    }
  
+  [[gnu::always_inline]]
+  inline bool
+  use_proxy_wait([[maybe_unused]] const __wait_args_base& args,
+                [[maybe_unused]] const void* /* addr */)
+  {
+#ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
+    if constexpr (__platform_wait_uses_type<uint32_t>)
+      if (args._M_obj_size == sizeof(uint32_t))
+       return false;
+
+    if constexpr (__platform_wait_uses_type<uint64_t>)
+      if (args._M_obj_size == sizeof(uint64_t))
+       return false;
+
+    // __wait_args::_M_old can only hold 64 bits, so larger types
+    // must always use a proxy wait.
+    if (args._M_obj_size > sizeof(uint64_t))
+      return true;
+
+    // __wait_args::_M_setup_wait only knows how to store 1/2/4/8 byte types,
+    // so anything else must always use a proxy wait.
+    if (__builtin_popcountg(args._M_obj_size) != 1)
+      return true;
+#endif
+
+    // Currently use proxy wait for everything else:
+    return true;
+  }
+
  } // namespace
  
-// Called for a proxy wait
-void
-__wait_args::_M_load_proxy_wait_val(const void* addr)
+// Return false (and don't change any data members) if we can do a non-proxy
+// wait for the object of size `_M_obj_size` at address `addr`.
+// Otherwise, the object at addr needs to use a proxy wait. Set _M_wait_state,
+// set _M_obj and _M_obj_size to refer to the _M_wait_state->_M_ver proxy,
+// load the current value from _M_obj and store it in _M_old, then return true.
+bool
+__wait_args::_M_setup_proxy_wait(const void* addr)
  {
-  // __glibcxx_assert( *this & __wait_flags::__proxy_wait );
+  if (!use_proxy_wait(*this, addr)) // We can wait on this address directly.
+    {
+      // Ensure the caller set _M_obj correctly, as that's what we'll wait on:
+      __glibcxx_assert(_M_obj == addr);
+      return false;
+    }
  
-  // We always need a waitable state for proxy waits.
+  // This will be a proxy wait, so get a waitable state.
    auto state = set_wait_state(addr, *this);
  
+  // The address we will wait on is the version count of the waitable state:
+  _M_obj = &state->_M_ver;
+  // __wait_impl and __wait_until_impl need to know this size:
+  _M_obj_size = sizeof(state->_M_ver);
+
    // Read the value of the _M_ver counter.
-  __atomic_load(&state->_M_ver, &_M_old, __ATOMIC_ACQUIRE);
+  _M_old = __atomic_load_n(&state->_M_ver, __ATOMIC_ACQUIRE);
+
+  return true;
  }
  
  __wait_result_type
-__wait_impl(const void* __addr, __wait_args_base& __args)
+__wait_impl([[maybe_unused]] const void* __addr, __wait_args_base& __args)
  {
-  auto __state = static_cast<__waitable_state*>(__args._M_wait_state);
-
-  const __platform_wait_t* __wait_addr;
-
-  if (__args & __wait_flags::__proxy_wait)
-    __wait_addr = &__state->_M_ver;
-  else
-    __wait_addr = static_cast<const __platform_wait_t*>(__addr);
+  auto* __wait_addr = static_cast<const __platform_wait_t*>(__args._M_obj);
  
    if (__args & __wait_flags::__do_spin)
      {
@@ -277,7 +328,7 @@ __wait_impl(const void* __addr, __wait_args_base& __args)
    if (__args & __wait_flags::__track_contention)
      set_wait_state(__addr, __args); // scoped_wait needs a __waitable_state
    scoped_wait s(__args);
-  __platform_wait(__wait_addr, __args._M_old);
+  __platform_wait(__wait_addr, __args._M_old, __args._M_obj_size);
    // We haven't loaded a new value so return _M_has_val=false
    return { ._M_val = __args._M_old, ._M_has_val = false, ._M_timeout = false };
  #else
@@ -286,6 +337,7 @@ __wait_impl(const void* __addr, __wait_args_base& __args)
    __atomic_load(__wait_addr, &__val, __args._M_order);
    if (__val == __args._M_old)
      {
+      auto __state = static_cast<__waitable_state*>(__args._M_wait_state);
        __state->_M_cv.wait(__state->_M_mtx);
        return { ._M_val = __val, ._M_has_val = false, ._M_timeout = false };
      }
@@ -294,24 +346,40 @@ __wait_impl(const void* __addr, __wait_args_base& __args)
  }
  
  void
-__notify_impl(const void* __addr, [[maybe_unused]] bool __all,
+__notify_impl([[maybe_unused]] const void* __addr, [[maybe_unused]] bool __all,
               const __wait_args_base& __args)
  {
-  auto __state = static_cast<__waitable_state*>(__args._M_wait_state);
-  if (!__state)
-    __state = &__waitable_state::_S_state_for(__addr);
+  const bool __track_contention = __args & __wait_flags::__track_contention;
+  const bool proxy_wait = use_proxy_wait(__args, __addr);
+
+  [[maybe_unused]] auto* __wait_addr
+    = static_cast<const __platform_wait_t*>(__addr);
+
+#ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
+  // Check whether it would be a non-proxy wait for this object.
+  // This condition must match the one in _M_setup_wait_impl to ensure that
+  // the address used for the notify matches the one used for the wait.
+  if (!proxy_wait)
+    {
+      if (__track_contention)
+       if (!__waitable_state::_S_state_for(__addr)._M_waiting())
+         return;
+
+      __platform_notify(__wait_addr, __all, __args._M_obj_size);
+      return;
+    }
+#endif
+
+  // Either a proxy wait or we don't have platform wait/wake primitives.
  
-  [[maybe_unused]] const __platform_wait_t* __wait_addr;
+  auto __state = &__waitable_state::_S_state_for(__addr);
  
    // Lock mutex so that proxied waiters cannot race with incrementing _M_ver
    // and see the old value, then sleep after the increment and notify_all().
    lock_guard __l{ *__state };
  
-  if (__args & __wait_flags::__proxy_wait)
+  if (proxy_wait)
      {
-      // Waiting for *__addr is actually done on the proxy's _M_ver.
-      __wait_addr = &__state->_M_ver;
-
        // Increment _M_ver so that waiting threads see something changed.
        // This has to be atomic because the load in _M_load_proxy_wait_val
        // is done without the mutex locked.
@@ -322,18 +390,18 @@ __notify_impl(const void* __addr, [[maybe_unused]] bool __all,
        // they can re-evaluate their conditions to see if they should
        // stop waiting or should wait again.
        __all = true;
+
+      __wait_addr = &__state->_M_ver;
      }
-  else // Use the atomic variable's own address.
-    __wait_addr = static_cast<const __platform_wait_t*>(__addr);
  
-  if (__args & __wait_flags::__track_contention)
+  if (__track_contention)
      {
        if (!__state->_M_waiting())
         return;
      }
  
  #ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
-  __platform_notify(__wait_addr, __all);
+  __platform_notify(__wait_addr, __all, sizeof(__state->_M_ver));
  #else
    __state->_M_cv.notify_all();
  #endif
@@ -343,30 +411,7 @@ __notify_impl(const void* __addr, [[maybe_unused]] bool __all,
  
  namespace
  {
-#ifdef _GLIBCXX_HAVE_LINUX_FUTEX
-// returns true if wait ended before timeout
-bool
-__platform_wait_until(const __platform_wait_t* __addr,
-                     __platform_wait_t __old,
-                     const __wait_clock_t::time_point& __atime) noexcept
-{
-  struct timespec __rt = chrono::__to_timeout_timespec(__atime);
-
-  if (syscall (SYS_futex, __addr,
-              static_cast<int>(__futex_wait_flags::__wait_bitset_private),
-              __old, &__rt, nullptr,
-              static_cast<int>(__futex_wait_flags::__bitset_match_any)))
-    {
-      if (errno == ETIMEDOUT)
-       return false;
-      if (errno != EINTR && errno != EAGAIN)
-       __throw_system_error(errno);
-    }
-  return true;
-}
-#endif // HAVE_LINUX_FUTEX
-
-#ifndef _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT
+#ifndef _GLIBCXX_HAVE_PLATFORM_WAIT
  bool
  __cond_wait_until(__condvar& __cv, mutex& __mx,
                   const __wait_clock_t::time_point& __atime)
@@ -381,7 +426,7 @@ __cond_wait_until(__condvar& __cv, mutex& __mx,
      __cv.wait_until(__mx, __ts);
    return __wait_clock_t::now() < __atime;
  }
-#endif // ! HAVE_PLATFORM_TIMED_WAIT
+#endif // ! HAVE_PLATFORM_WAIT
  
  // Unlike __spin_impl, does not always return _M_has_val == true.
  // If the deadline has already passed then no fresh value is loaded.
@@ -414,7 +459,7 @@ __spin_until_impl(const __platform_wait_t* __addr,
             return __res;
         }
  
-      __atomic_load(__addr, &__res._M_val, __args._M_order);
+      __res._M_val = __atomic_load_n(__addr, __args._M_order);
        __res._M_has_val = true;
        if (__res._M_val != __args._M_old)
         {
@@ -428,16 +473,11 @@ __spin_until_impl(const __platform_wait_t* __addr,
  } // namespace
  
  __wait_result_type
-__wait_until_impl(const void* __addr, __wait_args_base& __args,
+__wait_until_impl([[maybe_unused]] const void* __addr, __wait_args_base& __args,
                   const __wait_clock_t::duration& __time)
  {
    const __wait_clock_t::time_point __atime(__time);
-  auto __state = static_cast<__waitable_state*>(__args._M_wait_state);
-  const __platform_wait_t* __wait_addr;
-  if (__args & __wait_flags::__proxy_wait)
-    __wait_addr = &__state->_M_ver;
-  else
-    __wait_addr = static_cast<const __platform_wait_t*>(__addr);
+  auto* __wait_addr = static_cast<const __platform_wait_t*>(__args._M_obj);
  
    if (__args & __wait_flags::__do_spin)
      {
@@ -448,11 +488,12 @@ __wait_until_impl(const void* __addr, __wait_args_base& __args,
         return __res;
      }
  
-#ifdef _GLIBCXX_HAVE_PLATFORM_TIMED_WAIT
+#ifdef _GLIBCXX_HAVE_PLATFORM_WAIT
    if (__args & __wait_flags::__track_contention)
-    set_wait_state(__addr, __args);
+    set_wait_state(__addr, __args); // scoped_wait needs a __waitable_state
    scoped_wait s(__args);
-  bool timeout = !__platform_wait_until(__wait_addr, __args._M_old, __atime);
+  bool timeout = !__platform_wait_until(__wait_addr, __args._M_old, __atime,
+                                       __args._M_obj_size);
    return { ._M_val = __args._M_old, ._M_has_val = false, ._M_timeout = timeout };
  #else
    waiter_lock l(__args);
@@ -460,6 +501,7 @@ __wait_until_impl(const void* __addr, __wait_args_base& __args,
    __atomic_load(__wait_addr, &__val, __args._M_order);
    if (__val == __args._M_old)
      {
+      auto __state = static_cast<__waitable_state*>(__args._M_wait_state);
        bool timeout = !__cond_wait_until(__state->_M_cv, __state->_M_mtx, __atime);
        return { ._M_val = __val, ._M_has_val = false, ._M_timeout = timeout };
      }
author	Jonathan Wakely <jwakely@redhat.com>
	Sat, 15 Nov 2025 18:19:28 +0000 (18:19 +0000)
committer	Jonathan Wakely <redi@gcc.gnu.org>
	Thu, 27 Nov 2025 09:43:46 +0000 (09:43 +0000)
libstdc++-v3/config/abi/pre/gnu.ver		patch \| blob \| blame \| history
libstdc++-v3/include/bits/atomic_timed_wait.h		patch \| blob \| blame \| history
libstdc++-v3/include/bits/atomic_wait.h		patch \| blob \| blame \| history
libstdc++-v3/src/c++20/atomic.cc		patch \| blob \| blame \| history