]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
Use C11-like atomics instead of plain memory accesses in x86 lock elision.
authorTorvald Riegel <triegel@redhat.com>
Wed, 30 Nov 2016 16:53:11 +0000 (17:53 +0100)
committerTorvald Riegel <triegel@redhat.com>
Mon, 5 Dec 2016 15:19:43 +0000 (16:19 +0100)
This uses atomic operations to access lock elision metadata that is accessed
concurrently (ie, adapt_count fields).  The size of the data is less than a
word but accessed only with atomic loads and stores; therefore, we add
support for shorter-size atomic load and stores too.

* include/atomic.h (__atomic_check_size_ls): New.
(atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed,
atomic_store_release): Use it.
* sysdeps/x86/elide.h (ACCESS_ONCE): Remove.
(elision_adapt, ELIDE_LOCK): Use atomics.
* sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use
atomics and improve code comments.
* sysdeps/unix/sysv/linux/x86/elision-trylock.c
(__lll_trylock_elision): Likewise.

ChangeLog
include/atomic.h
sysdeps/unix/sysv/linux/x86/elision-lock.c
sysdeps/unix/sysv/linux/x86/elision-trylock.c
sysdeps/x86/elide.h

index 6cbb0a8b98970647ab56001af1e6d2748ba3c013..9c0aaa6e431452b69aeb077d5b073e565d3867e4 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2016-12-05  Torvald Riegel  <triegel@redhat.com>
+
+       * include/atomic.h (__atomic_check_size_ls): New.
+       (atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed,
+       atomic_store_release): Use it.
+       * sysdeps/x86/elide.h (ACCESS_ONCE): Remove.
+       (elision_adapt, ELIDE_LOCK): Use atomics.
+       * sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use
+       atomics and improve code comments.
+       * sysdeps/unix/sysv/linux/x86/elision-trylock.c
+       (__lll_trylock_elision): Likewise.
+
 2016-12-04  Samuel Thibault  <samuel.thibault@ens-lyon.org>
 
        * hurd/hurd.h: Cast errno constants to error_t to fix usage in C++
index c8b46649c5285eb154f49768b29df0a4f8dde8c0..d14cbc5ca89d6acd75d780d41e55969a52e5c468 100644 (file)
@@ -550,6 +550,20 @@ void __atomic_link_error (void);
    if (sizeof (*mem) != 4)                                                   \
      __atomic_link_error ();
 # endif
+/* We additionally provide 8b and 16b atomic loads and stores; we do not yet
+   need other atomic operations of such sizes, and restricting the support to
+   loads and stores makes this easier for archs that do not have native
+   support for atomic operations to less-than-word-sized data.  */
+# if __HAVE_64B_ATOMICS == 1
+#  define __atomic_check_size_ls(mem) \
+   if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && (sizeof (*mem) != 4)   \
+       && (sizeof (*mem) != 8))                                                      \
+     __atomic_link_error ();
+# else
+#  define __atomic_check_size_ls(mem) \
+   if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4)    \
+     __atomic_link_error ();
+# endif
 
 # define atomic_thread_fence_acquire() \
   __atomic_thread_fence (__ATOMIC_ACQUIRE)
@@ -559,18 +573,20 @@ void __atomic_link_error (void);
   __atomic_thread_fence (__ATOMIC_SEQ_CST)
 
 # define atomic_load_relaxed(mem) \
-  ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_RELAXED); })
+  ({ __atomic_check_size_ls((mem));                                          \
+     __atomic_load_n ((mem), __ATOMIC_RELAXED); })
 # define atomic_load_acquire(mem) \
-  ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
+  ({ __atomic_check_size_ls((mem));                                          \
+     __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
 
 # define atomic_store_relaxed(mem, val) \
   do {                                                                       \
-    __atomic_check_size((mem));                                                      \
+    __atomic_check_size_ls((mem));                                           \
     __atomic_store_n ((mem), (val), __ATOMIC_RELAXED);                       \
   } while (0)
 # define atomic_store_release(mem, val) \
   do {                                                                       \
-    __atomic_check_size((mem));                                                      \
+    __atomic_check_size_ls((mem));                                           \
     __atomic_store_n ((mem), (val), __ATOMIC_RELEASE);                       \
   } while (0)
 
index 5e66960123345b1afd97c81ac6c45c0e4505b1f5..c05ade4722f131c605148b727e4e8550053ddb9a 100644 (file)
 int
 __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
 {
-  if (*adapt_count <= 0)
+  /* adapt_count can be accessed concurrently; these accesses can be both
+     inside of transactions (if critical sections are nested and the outer
+     critical section uses lock elision) and outside of transactions.  Thus,
+     we need to use atomic accesses to avoid data races.  However, the
+     value of adapt_count is just a hint, so relaxed MO accesses are
+     sufficient.  */
+  if (atomic_load_relaxed (adapt_count) <= 0)
     {
       unsigned status;
       int try_xbegin;
@@ -70,15 +76,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
                        && _XABORT_CODE (status) == _ABORT_LOCK_BUSY)
                {
                  /* Right now we skip here.  Better would be to wait a bit
-                    and retry.  This likely needs some spinning.  */
-                 if (*adapt_count != aconf.skip_lock_busy)
-                   *adapt_count = aconf.skip_lock_busy;
+                    and retry.  This likely needs some spinning.  See
+                    above for why relaxed MO is sufficient.  */
+                 if (atomic_load_relaxed (adapt_count)
+                     != aconf.skip_lock_busy)
+                   atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
                }
              /* Internal abort.  There is no chance for retry.
                 Use the normal locking and next time use lock.
-                Be careful to avoid writing to the lock.  */
-             else if (*adapt_count != aconf.skip_lock_internal_abort)
-               *adapt_count = aconf.skip_lock_internal_abort;
+                Be careful to avoid writing to the lock.  See above for why
+                relaxed MO is sufficient.  */
+             else if (atomic_load_relaxed (adapt_count)
+                 != aconf.skip_lock_internal_abort)
+               atomic_store_relaxed (adapt_count,
+                   aconf.skip_lock_internal_abort);
              break;
            }
        }
@@ -87,7 +98,8 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
     {
       /* Use a normal lock until the threshold counter runs out.
         Lost updates possible.  */
-      (*adapt_count)--;
+      atomic_store_relaxed (adapt_count,
+         atomic_load_relaxed (adapt_count) - 1);
     }
 
   /* Use a normal lock here.  */
index 65d9c18584412c6b8a2d01c032b7f6b1d18e33e4..2d44f50426a52cdc1a6ee1f4d525a260b34d3eed 100644 (file)
@@ -36,8 +36,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
      return an error.  */
   _xabort (_ABORT_NESTED_TRYLOCK);
 
-  /* Only try a transaction if it's worth it.  */
-  if (*adapt_count <= 0)
+  /* Only try a transaction if it's worth it.  See __lll_lock_elision for
+     why we need atomic accesses.  Relaxed MO is sufficient because this is
+     just a hint.  */
+  if (atomic_load_relaxed (adapt_count) <= 0)
     {
       unsigned status;
 
@@ -55,16 +57,18 @@ __lll_trylock_elision (int *futex, short *adapt_count)
       if (!(status & _XABORT_RETRY))
         {
           /* Internal abort.  No chance for retry.  For future
-             locks don't try speculation for some time.  */
-          if (*adapt_count != aconf.skip_trylock_internal_abort)
-            *adapt_count = aconf.skip_trylock_internal_abort;
+             locks don't try speculation for some time.  See above for MO.  */
+          if (atomic_load_relaxed (adapt_count)
+              != aconf.skip_lock_internal_abort)
+            atomic_store_relaxed (adapt_count, aconf.skip_lock_internal_abort);
         }
       /* Could do some retries here.  */
     }
   else
     {
-      /* Lost updates are possible, but harmless.  */
-      (*adapt_count)--;
+      /* Lost updates are possible but harmless (see above).  */
+      atomic_store_relaxed (adapt_count,
+         atomic_load_relaxed (adapt_count) - 1);
     }
 
   return lll_trylock (*futex);
index 8691e6673de9fdce25c83a9956cb860a6cec03e0..f7d5220c17a95e43de5f84ad01704f5dba0d3a8d 100644 (file)
@@ -20,8 +20,8 @@
 
 #include <hle.h>
 #include <elision-conf.h>
+#include <atomic.h>
 
-#define ACCESS_ONCE(x) (* (volatile typeof(x) *) &(x))
 
 /* Adapt elision with ADAPT_COUNT and STATUS and decide retries.  */
 
@@ -35,28 +35,35 @@ elision_adapt(signed char *adapt_count, unsigned int status)
     {
       /* Right now we skip here.  Better would be to wait a bit
         and retry.  This likely needs some spinning. Be careful
-        to avoid writing the lock.  */
-      if (*adapt_count != __elision_aconf.skip_lock_busy)
-       ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_busy;
+        to avoid writing the lock.
+        Using relaxed MO and separate atomic accesses is sufficient because
+        adapt_count is just a hint.  */
+      if (atomic_load_relaxed (adapt_count) != __elision_aconf.skip_lock_busy)
+       atomic_store_relaxed (adapt_count, __elision_aconf.skip_lock_busy);
     }
   /* Internal abort.  There is no chance for retry.
      Use the normal locking and next time use lock.
-     Be careful to avoid writing to the lock.  */
-  else if (*adapt_count != __elision_aconf.skip_lock_internal_abort)
-    ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_internal_abort;
+     Be careful to avoid writing to the lock.  See above for MO.  */
+  else if (atomic_load_relaxed (adapt_count)
+      != __elision_aconf.skip_lock_internal_abort)
+    atomic_store_relaxed (adapt_count,
+       __elision_aconf.skip_lock_internal_abort);
   return true;
 }
 
 /* is_lock_free must be executed inside the transaction */
 
 /* Returns true if lock defined by IS_LOCK_FREE was elided.
-   ADAPT_COUNT is a pointer to per-lock state variable. */
+   ADAPT_COUNT is a per-lock state variable; it must be accessed atomically
+   to avoid data races but is just a hint, so using relaxed MO and separate
+   atomic loads and stores instead of atomic read-modify-write operations is
+   sufficient.  */
 
 #define ELIDE_LOCK(adapt_count, is_lock_free)                  \
   ({                                                           \
     int ret = 0;                                               \
                                                                \
-    if ((adapt_count) <= 0)                                    \
+    if (atomic_load_relaxed (&(adapt_count)) <= 0)             \
       {                                                                \
         for (int i = __elision_aconf.retry_try_xbegin; i > 0; i--) \
           {                                                    \
@@ -75,12 +82,13 @@ elision_adapt(signed char *adapt_count, unsigned int status)
           }                                                    \
       }                                                                \
     else                                                       \
-      (adapt_count)--; /* missing updates ok */                        \
+      atomic_store_relaxed (&(adapt_count),                    \
+         atomic_load_relaxed (&(adapt_count)) - 1);            \
     ret;                                                       \
   })
 
 /* Returns true if lock defined by IS_LOCK_FREE was try-elided.
-   ADAPT_COUNT is a pointer to per-lock state variable.  */
+   ADAPT_COUNT is a per-lock state variable.  */
 
 #define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) ({     \
   int ret = 0;                                         \