From 16fbdda3c33703702462a46805a88673e7f6ca51 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Fri, 18 Jun 2021 09:45:27 +0200
Subject: [PATCH] MEDIUM: queue: use a dedicated lock for the queues (v2)

Till now whenever a server or proxy's queue was touched, this server
or proxy's lock was taken. Not only this requires distinct code paths,
but it also causes unnecessary contention with other uses of these locks.

This patch adds a lock inside the "queue" structure that will be used
the same way by the server and the proxy queuing code. The server used
to use a spinlock and the proxy an rwlock, though the queue only used
it for locked writes. This new version uses a spinlock since we don't
need the read lock part here. Tests have not shown any benefit nor cost
in using this one versus the rwlock so we could change later if needed.

The lower contention on the locks increases the performance from 362k
to 374k req/s on 16 threads with 20 servers and leastconn. The gain
with roundrobin even increases by 9%.

This is tagged medium because the lock is changed, but no other part of
the code touches the queues, with nor without locking, so this should
remain invisible.
---
 include/haproxy/queue-t.h |  1 +
 include/haproxy/thread.h  |  2 ++
 src/proxy.c               |  1 +
 src/queue.c               | 49 ++++++++++++++++++++-------------------
 src/server.c              |  1 +
 5 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/include/haproxy/queue-t.h b/include/haproxy/queue-t.h
index 4478d6c556..cbc9d7183e 100644
--- a/include/haproxy/queue-t.h
+++ b/include/haproxy/queue-t.h
@@ -41,6 +41,7 @@ struct pendconn {
 
 struct queue {
 	struct eb_root head;                    /* queued pendconnds */
+	__decl_thread(HA_SPINLOCK_T lock);      /* for manipulations in the tree */
 	unsigned int idx;			/* current queuing index */
 	unsigned int length;                    /* number of entries */
 };
diff --git a/include/haproxy/thread.h b/include/haproxy/thread.h
index b64e2bb24f..0ca773cf60 100644
--- a/include/haproxy/thread.h
+++ b/include/haproxy/thread.h
@@ -399,6 +399,7 @@ enum lock_label {
 	LOGSRV_LOCK,
 	DICT_LOCK,
 	PROTO_LOCK,
+	QUEUE_LOCK,
 	CKCH_LOCK,
 	SNI_LOCK,
 	SSL_SERVER_LOCK,
@@ -451,6 +452,7 @@ static inline const char *lock_label(enum lock_label label)
 	case LOGSRV_LOCK:          return "LOGSRV";
 	case DICT_LOCK:            return "DICT";
 	case PROTO_LOCK:           return "PROTO";
+	case QUEUE_LOCK:           return "QUEUE";
 	case CKCH_LOCK:            return "CKCH";
 	case SNI_LOCK:             return "SNI";
 	case SSL_SERVER_LOCK:      return "SSL_SERVER";
diff --git a/src/proxy.c b/src/proxy.c
index f6d0442f16..98a46fd5b3 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -1293,6 +1293,7 @@ void init_new_proxy(struct proxy *p)
 	memset(p, 0, sizeof(struct proxy));
 	p->obj_type = OBJ_TYPE_PROXY;
 	p->queue.head = EB_ROOT;
+	HA_SPIN_INIT(&p->queue.lock);
 	LIST_INIT(&p->acl);
 	LIST_INIT(&p->http_req_rules);
 	LIST_INIT(&p->http_res_rules);
diff --git a/src/queue.c b/src/queue.c
index 5886dbf08c..05bd8f2c3c 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -157,9 +157,9 @@ static void __pendconn_unlink_prx(struct pendconn *p)
 static inline void pendconn_queue_lock(struct pendconn *p)
 {
 	if (p->srv)
-		HA_SPIN_LOCK(SERVER_LOCK, &p->srv->lock);
+		HA_SPIN_LOCK(QUEUE_LOCK, &p->srv->queue.lock);
 	else
-		HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->px->lock);
+		HA_SPIN_LOCK(QUEUE_LOCK, &p->px->queue.lock);
 }
 
 /* Unlocks the queue the pendconn element belongs to. This relies on both p->px
@@ -169,9 +169,9 @@ static inline void pendconn_queue_lock(struct pendconn *p)
 static inline void pendconn_queue_unlock(struct pendconn *p)
 {
 	if (p->srv)
-		HA_SPIN_UNLOCK(SERVER_LOCK, &p->srv->lock);
+		HA_SPIN_UNLOCK(QUEUE_LOCK, &p->srv->queue.lock);
 	else
-		HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->px->lock);
+		HA_SPIN_UNLOCK(QUEUE_LOCK, &p->px->queue.lock);
 }
 
 /* Removes the pendconn from the server/proxy queue. At this stage, the
@@ -187,12 +187,12 @@ void pendconn_unlink(struct pendconn *p)
 
 	if (p->srv) {
 		/* queued in the server */
-		HA_SPIN_LOCK(SERVER_LOCK, &p->srv->lock);
+		HA_SPIN_LOCK(QUEUE_LOCK, &p->srv->queue.lock);
 		if (p->node.node.leaf_p) {
 			__pendconn_unlink_srv(p);
 			done = 1;
 		}
-		HA_SPIN_UNLOCK(SERVER_LOCK, &p->srv->lock);
+		HA_SPIN_UNLOCK(QUEUE_LOCK, &p->srv->queue.lock);
 		if (done) {
 			_HA_ATOMIC_DEC(&p->srv->queue.length);
 			_HA_ATOMIC_DEC(&p->px->totpend);
@@ -200,12 +200,12 @@ void pendconn_unlink(struct pendconn *p)
 	}
 	else {
 		/* queued in the proxy */
-		HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->px->lock);
+		HA_SPIN_LOCK(QUEUE_LOCK, &p->px->queue.lock);
 		if (p->node.node.leaf_p) {
 			__pendconn_unlink_prx(p);
 			done = 1;
 		}
-		HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->px->lock);
+		HA_SPIN_UNLOCK(QUEUE_LOCK, &p->px->queue.lock);
 		if (done) {
 			_HA_ATOMIC_DEC(&p->px->queue.length);
 			_HA_ATOMIC_DEC(&p->px->totpend);
@@ -339,9 +339,8 @@ void process_srv_queue(struct server *s, int server_locked)
 	int maxconn;
 	int done = 0;
 
-	if (!server_locked)
-		HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
-	HA_RWLOCK_WRLOCK(PROXY_LOCK,  &p->lock);
+	HA_SPIN_LOCK(SERVER_LOCK, &s->queue.lock);
+	HA_SPIN_LOCK(PROXY_LOCK,  &p->queue.lock);
 	maxconn = srv_dynamic_maxconn(s);
 	while (s->served < maxconn) {
 		int ret = pendconn_process_next_strm(s, p);
@@ -350,9 +349,8 @@ void process_srv_queue(struct server *s, int server_locked)
 		_HA_ATOMIC_INC(&s->served);
 		done++;
 	}
-	HA_RWLOCK_WRUNLOCK(PROXY_LOCK,  &p->lock);
-	if (!server_locked)
-		HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+	HA_SPIN_UNLOCK(PROXY_LOCK,  &p->queue.lock);
+	HA_SPIN_UNLOCK(SERVER_LOCK, &s->queue.lock);
 
 	if (done) {
 		_HA_ATOMIC_SUB(&p->totpend, done);
@@ -417,10 +415,10 @@ struct pendconn *pendconn_add(struct stream *strm)
 		}
 		__ha_barrier_atomic_store();
 
-		HA_SPIN_LOCK(SERVER_LOCK, &p->srv->lock);
+		HA_SPIN_LOCK(QUEUE_LOCK, &p->srv->queue.lock);
 		p->queue_idx = srv->queue.idx - 1; // for increment
 		eb32_insert(&srv->queue.head, &p->node);
-		HA_SPIN_UNLOCK(SERVER_LOCK, &p->srv->lock);
+		HA_SPIN_UNLOCK(QUEUE_LOCK, &p->srv->queue.lock);
 	}
 	else {
 		unsigned int old_max, new_max;
@@ -433,10 +431,10 @@ struct pendconn *pendconn_add(struct stream *strm)
 		}
 		__ha_barrier_atomic_store();
 
-		HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->px->lock);
+		HA_SPIN_LOCK(QUEUE_LOCK, &p->px->queue.lock);
 		p->queue_idx = px->queue.idx - 1; // for increment
 		eb32_insert(&px->queue.head, &p->node);
-		HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->px->lock);
+		HA_SPIN_UNLOCK(QUEUE_LOCK, &p->px->queue.lock);
 	}
 
 	_HA_ATOMIC_INC(&px->totpend);
@@ -444,8 +442,8 @@ struct pendconn *pendconn_add(struct stream *strm)
 }
 
 /* Redistribute pending connections when a server goes down. The number of
- * connections redistributed is returned. It must be called with the server
- * lock held.
+ * connections redistributed is returned. It will take the server queue lock
+ * and does not use nor depend on other locks.
  */
 int pendconn_redistribute(struct server *s)
 {
@@ -458,6 +456,7 @@ int pendconn_redistribute(struct server *s)
 	if ((s->proxy->options & (PR_O_REDISP|PR_O_PERSIST)) != PR_O_REDISP)
 		return 0;
 
+	HA_SPIN_LOCK(SERVER_LOCK, &s->queue.lock);
 	for (node = eb32_first(&s->queue.head); node; node = nodeb) {
 		nodeb =	eb32_next(node);
 
@@ -472,6 +471,8 @@ int pendconn_redistribute(struct server *s)
 		task_wakeup(p->strm->task, TASK_WOKEN_RES);
 		xferred++;
 	}
+	HA_SPIN_UNLOCK(SERVER_LOCK, &s->queue.lock);
+
 	if (xferred) {
 		_HA_ATOMIC_SUB(&s->queue.length, xferred);
 		_HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
@@ -482,8 +483,8 @@ int pendconn_redistribute(struct server *s)
 /* Check for pending connections at the backend, and assign some of them to
  * the server coming up. The server's weight is checked before being assigned
  * connections it may not be able to handle. The total number of transferred
- * connections is returned. It must be called with the server lock held, and
- * will take the proxy's lock.
+ * connections is returned. It will take the proxy's queue lock and will not
+ * use nor depend on other locks.
  */
 int pendconn_grab_from_px(struct server *s)
 {
@@ -502,7 +503,7 @@ int pendconn_grab_from_px(struct server *s)
 	     ((s != s->proxy->lbprm.fbck) && !(s->proxy->options & PR_O_USE_ALL_BK))))
 		return 0;
 
-	HA_RWLOCK_WRLOCK(PROXY_LOCK, &s->proxy->lock);
+	HA_SPIN_LOCK(QUEUE_LOCK, &s->proxy->queue.lock);
 	maxconn = srv_dynamic_maxconn(s);
 	while ((p = pendconn_first(&s->proxy->queue.head))) {
 		if (s->maxconn && s->served + xferred >= maxconn)
@@ -514,7 +515,7 @@ int pendconn_grab_from_px(struct server *s)
 		task_wakeup(p->strm->task, TASK_WOKEN_RES);
 		xferred++;
 	}
-	HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &s->proxy->lock);
+	HA_SPIN_UNLOCK(QUEUE_LOCK, &s->proxy->queue.lock);
 	if (xferred) {
 		_HA_ATOMIC_SUB(&s->proxy->queue.length, xferred);
 		_HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
diff --git a/src/server.c b/src/server.c
index 5d869e6fba..0a128a2f1e 100644
--- a/src/server.c
+++ b/src/server.c
@@ -2164,6 +2164,7 @@ struct server *new_server(struct proxy *proxy)
 	srv->obj_type = OBJ_TYPE_SERVER;
 	srv->proxy = proxy;
 	srv->queue.head = EB_ROOT;
+	HA_SPIN_INIT(&srv->queue.lock);
 	LIST_APPEND(&servers_list, &srv->global_list);
 	LIST_INIT(&srv->srv_rec_item);
 	LIST_INIT(&srv->ip_rec_item);
-- 
2.47.3