MAJOR: listener: use the multi-queue for multi-thread listeners

author Willy Tarreau <w@1wt.eu>

Sun, 27 Jan 2019 14:37:19 +0000 (15:37 +0100)

committer Willy Tarreau <w@1wt.eu>

Wed, 27 Feb 2019 13:27:07 +0000 (14:27 +0100)
author Willy Tarreau <w@1wt.eu>
Sun, 27 Jan 2019 14:37:19 +0000 (15:37 +0100)
committer Willy Tarreau <w@1wt.eu>
Wed, 27 Feb 2019 13:27:07 +0000 (14:27 +0100)
diff --git a/src/listener.c b/src/listener.c

index 9e8c87818dc0d545266e2af281ddf7348003a6f1..2b8c2dfabcb22e96e3a33d4417cf8f6600c0cd96 100644 (file)
--- a/src/listener.c
+++ b/src/listener.c
@@ -816,6 +816,52 @@ void listener_accept(int fd)
                  */
                 next_conn = 0;
  
+#if defined(USE_THREAD)
+               count = l->bind_conf->thr_count;
+               if (count > 1) {
+                       struct accept_queue_ring *ring;
+                       int r, t1, t2, q1, q2;
+
+                       /* pick two small distinct random values and drop lower bits */
+                       r = (random() >> 8) % ((count - 1) * count);
+                       t2 = r / count; // 0..thr_count-2
+                       t1 = r % count; // 0..thr_count-1
+                       t2 += t1 + 1;   // necessarily different from t1
+
+                       if (t2 >= count)
+                               t2 -= count;
+
+                       t1 = bind_map_thread_id(l->bind_conf, t1);
+                       t2 = bind_map_thread_id(l->bind_conf, t2);
+
+                       q1 = accept_queue_rings[t1].tail - accept_queue_rings[t1].head + ACCEPT_QUEUE_SIZE;
+                       if (q1 >= ACCEPT_QUEUE_SIZE)
+                               q1 -= ACCEPT_QUEUE_SIZE;
+
+                       q2 = accept_queue_rings[t2].tail - accept_queue_rings[t2].head + ACCEPT_QUEUE_SIZE;
+                       if (q2 >= ACCEPT_QUEUE_SIZE)
+                               q2 -= ACCEPT_QUEUE_SIZE;
+
+                       /* make t1 the lowest loaded thread */
+                       if (q1 >= ACCEPT_QUEUE_SIZE || l->thr_conn[t1] + q1 > l->thr_conn[t2] + q2)
+                               t1 = t2;
+
+                       /* We use deferred accepts even if it's the local thread because
+                        * tests show that it's the best performing model, likely due to
+                        * better cache locality when processing this loop.
+                        */
+                       ring = &accept_queue_rings[t1];
+                       if (accept_queue_push_mp(ring, cfd, l, &addr, laddr)) {
+                               task_wakeup(ring->task, TASK_WOKEN_IO);
+                               continue;
+                       }
+                       /* If the ring is full we do a synchronous accept on
+                        * the local thread here.
+                        * FIXME: we should update some stats here.
+                        */
+               }
+#endif // USE_THREAD
+
                 HA_ATOMIC_ADD(&l->thr_conn[tid], 1);
                 ret = l->accept(l, cfd, &addr);
                 if (unlikely(ret <= 0)) {
author	Willy Tarreau <w@1wt.eu>
	Sun, 27 Jan 2019 14:37:19 +0000 (15:37 +0100)
committer	Willy Tarreau <w@1wt.eu>
	Wed, 27 Feb 2019 13:27:07 +0000 (14:27 +0100)