From 3ea2490b4885250d932e537d2cbe0394a228f8f3 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 27 Jul 2018 07:47:24 +0200 Subject: [PATCH] BUG/MEDIUM: threads/sync: use sched_yield when available There is a corner case with the sync point which can significantly degrade performance. The reason is that it forces all threads to busy spin there, and that if there are less CPUs available than threads, this busy activity from some threads will force others to wait longer in epoll() or to simply be scheduled out while doing something else, and will increase the time needed to reach the sync point. Given that the sync point is not expected to be stressed *that* much, better call sched_yield() while waiting there to release the CPU and offer it to waiting threads. On a simple test with 4 threads bound to two cores using "maxconn 1" on the server line, the performance was erratic before the recent scheduler changes (between 40 and 200 conn/s with hundreds of ms response time), and it jumped to 7200 with 12ms response time with this fix applied. It should be backported to 1.8 since 1.8 is affected as well. --- src/hathreads.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/hathreads.c b/src/hathreads.c index a3bca7d024..fa9993bd0c 100644 --- a/src/hathreads.c +++ b/src/hathreads.c @@ -106,8 +106,20 @@ static inline void thread_sync_barrier(volatile unsigned long *barrier) HA_ATOMIC_CAS(barrier, &old, 0); HA_ATOMIC_OR(barrier, tid_bit); - while ((*barrier & all_threads_mask) != all_threads_mask) + + /* Note below: we need to wait for all threads to join here, but in + * case several threads are scheduled on the same CPU, busy polling + * will instead degrade the performance, forcing other threads to + * wait longer (typically in epoll_wait()). Let's use sched_yield() + * when available instead. + */ + while ((*barrier & all_threads_mask) != all_threads_mask) { +#if _POSIX_PRIORITY_SCHEDULING + sched_yield(); +#else pl_cpu_relax(); +#endif + } } /* Enter into the sync point and lock it if the current thread has requested a -- 2.47.3