From 5b4aa65a489cdda0e29f27a4ca0e4f75fe2901cd Mon Sep 17 00:00:00 2001 From: Paul Murphy Date: Tue, 12 Apr 2016 16:58:54 -0500 Subject: [PATCH] powerpc: Optimize lock elision for pthread_mutex_t With TLE enabled, the adapt count variable update incurs an 8% overhead before entering the critical section of an elided mutex. Instead, if it is done right after leaving the critical section, this serialization can be avoided. This alters the existing behavior of __lll_trylock_elision as it will only decrement the adapt_count if it successfully acquires the lock. * sysdeps/unix/sysv/linux/powerpc/elision-lock.c (__lll_lock_elision): Remove adapt_count decrement... * sysdeps/unix/sysv/linux/powerpc/elision-trylock.c (__lll_trylock_elision): Likewise. * sysdeps/unix/sysv/linux/powerpc/elision-unlock.c (__lll_unlock_elision): ... to here. And utilize new adapt_count parameter. * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h (__lll_unlock_elision): Update to include adapt_count parameter. (lll_unlock_elision): Pass pointer to adapt_count variable. (cherry picked from commit fadd2ad9cc36115440d50b0eae9299e65988917d) --- ChangeLog | 15 +++++++++++++++ sysdeps/unix/sysv/linux/powerpc/elision-lock.c | 1 - sysdeps/unix/sysv/linux/powerpc/elision-trylock.c | 1 - sysdeps/unix/sysv/linux/powerpc/elision-unlock.c | 12 ++++++++++-- sysdeps/unix/sysv/linux/powerpc/lowlevellock.h | 4 ++-- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index c9b99741ae7..93a5ed6da52 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2016-04-12 Paul E. Murphy + + * sysdeps/unix/sysv/linux/powerpc/elision-lock.c + (__lll_lock_elision): Remove adapt_count decrement... + * sysdeps/unix/sysv/linux/powerpc/elision-trylock.c + (__lll_trylock_elision): Likewise. + * sysdeps/unix/sysv/linux/powerpc/elision-unlock.c + (__lll_unlock_elision): ... to here. And utilize + new adapt_count parameter. + * sysdeps/unix/sysv/linux/powerpc/lowlevellock.h + (__lll_unlock_elision): Update to include adapt_count + parameter. + (lll_unlock_elision): Pass pointer to adapt_count + variable. + 2016-04-12 Paul E. Murphy * nptl/pthread_mutex_unlock.c (lll_unlock_elision): diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c index 82399de49a8..2a0e5407dd2 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c @@ -47,7 +47,6 @@ __lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared) { if (*adapt_count > 0) { - (*adapt_count)--; goto use_lock; } diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c index 5995e77ad6c..b391116b64c 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c @@ -36,7 +36,6 @@ __lll_trylock_elision (int *futex, short *adapt_count) /* Only try a transaction if it's worth it. */ if (*adapt_count > 0) { - (*adapt_count)--; goto use_lock; } diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c index 7234db6e2d3..4b4ae62d9fd 100644 --- a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c +++ b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c @@ -21,12 +21,20 @@ #include "htm.h" int -__lll_unlock_elision(int *lock, int pshared) +__lll_unlock_elision (int *lock, short *adapt_count, int pshared) { /* When the lock was free we're in a transaction. */ if (*lock == 0) __libc_tend (0); else - lll_unlock ((*lock), pshared); + { + lll_unlock ((*lock), pshared); + + /* Update the adapt count AFTER completing the critical section. + Doing this here prevents unneeded stalling when entering + a critical section. Saving about 8% runtime on P8. */ + if (*adapt_count > 0) + (*adapt_count)--; + } return 0; } diff --git a/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h b/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h index 16479e77530..6769c253cee 100644 --- a/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h +++ b/sysdeps/unix/sysv/linux/powerpc/lowlevellock.h @@ -32,7 +32,7 @@ extern int __lll_timedlock_elision extern int __lll_lock_elision (int *futex, short *adapt_count, int private) attribute_hidden; -extern int __lll_unlock_elision(int *lock, int private) +extern int __lll_unlock_elision (int *lock, short *adapt_count, int private) attribute_hidden; extern int __lll_trylock_elision(int *lock, short *adapt_count) @@ -41,7 +41,7 @@ extern int __lll_trylock_elision(int *lock, short *adapt_count) #define lll_lock_elision(futex, adapt_count, private) \ __lll_lock_elision (&(futex), &(adapt_count), private) #define lll_unlock_elision(futex, adapt_count, private) \ - __lll_unlock_elision (&(futex), private) + __lll_unlock_elision (&(futex), &(adapt_count), private) #define lll_trylock_elision(futex, adapt_count) \ __lll_trylock_elision (&(futex), &(adapt_count)) -- 2.47.2