From: Willy Tarreau <w@1wt.eu>
Date: Thu, 24 Jun 2021 06:04:24 +0000 (+0200)
Subject: MEDIUM: queue: take the proxy lock only during the px queue accesses
X-Git-Tag: v2.5-dev1~25
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=49667c14ba15e0798be145635c80c809ffeaa44b;p=thirdparty%2Fhaproxy.git

MEDIUM: queue: take the proxy lock only during the px queue accesses

There's no point keeping the proxy lock held for a long time, it's
only needed when checking the proxy's queue, and keeping it prevents
multiple servers from dequeuing in parallel. Let's move it into
pendconn_process_next_strm() and release it ASAP. The pendconn
remains under the server queue lock's protection, guaranteeing that
no stream will release it while it's being touched.

For roundrobin, the performance increases by 76% (327k to 575k) on
16 threads. Even with a single server and maxconn=100, the performance
increases from 398 to 496 kreq/s. For leastconn, almost no change is
visible (less than one percent) but this is expected since most of the
time there is spent in fwlc_reposition() and fwlc_get_next_server().
---

diff --git a/src/queue.c b/src/queue.c
index 1aa3fb9ac7..7b88422ec9 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -271,8 +271,15 @@ static int pendconn_process_next_strm(struct server *srv, struct proxy *px, int
 		p = pendconn_first(&srv->queue.head);
 
 	pp = NULL;
-	if (px_ok && px->queue.length)
+	if (px_ok && px->queue.length) {
+		/* the lock only remains held as long as the pp is
+		 * in the proxy's queue.
+		 */
+		HA_SPIN_LOCK(PROXY_LOCK,  &px->queue.lock);
 		pp = pendconn_first(&px->queue.head);
+		if (!pp)
+			HA_SPIN_UNLOCK(PROXY_LOCK,  &px->queue.lock);
+	}
 
 	if (!p && !pp)
 		return 0;
@@ -304,11 +311,14 @@ static int pendconn_process_next_strm(struct server *srv, struct proxy *px, int
  use_pp:
 	/* Let's switch from the server pendconn to the proxy pendconn */
 	__pendconn_unlink_prx(pp);
+	HA_SPIN_UNLOCK(PROXY_LOCK,  &px->queue.lock);
 	_HA_ATOMIC_DEC(&px->queue.length);
 	_HA_ATOMIC_INC(&px->queue.idx);
 	p = pp;
 	goto unlinked;
  use_p:
+	if (pp)
+		HA_SPIN_UNLOCK(PROXY_LOCK,  &px->queue.lock);
 	__pendconn_unlink_srv(p);
 	_HA_ATOMIC_DEC(&srv->queue.length);
 	_HA_ATOMIC_INC(&srv->queue.idx);
@@ -343,7 +353,6 @@ void process_srv_queue(struct server *s)
 	          (s == p->lbprm.fbck || (p->options & PR_O_USE_ALL_BK))));
 
 	HA_SPIN_LOCK(SERVER_LOCK, &s->queue.lock);
-	HA_SPIN_LOCK(PROXY_LOCK,  &p->queue.lock);
 	maxconn = srv_dynamic_maxconn(s);
 	while (s->served < maxconn) {
 		int ret = pendconn_process_next_strm(s, p, px_ok);
@@ -352,7 +361,6 @@ void process_srv_queue(struct server *s)
 		_HA_ATOMIC_INC(&s->served);
 		done++;
 	}
-	HA_SPIN_UNLOCK(PROXY_LOCK,  &p->queue.lock);
 	HA_SPIN_UNLOCK(SERVER_LOCK, &s->queue.lock);
 
 	if (done) {