]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: ring: significant boost in the loop by checking the ring queue ptr first
authorWilly Tarreau <w@1wt.eu>
Sun, 17 Mar 2024 09:20:56 +0000 (10:20 +0100)
committerWilly Tarreau <w@1wt.eu>
Mon, 25 Mar 2024 17:34:19 +0000 (17:34 +0000)
By doing that and placing the cpu_relax at the right places, the ARM
reaches 6.0M/s on 80 threads. On x86_64, at 3C6T the EPYC sees a small
increase from 4.45M to 4.57M but at 24C48T it sees a drop from 3.82M
to 3.33M due to the write contention hidden behind the CAS that
implements the FETCH_OR(), that we'll address next.

src/ring.c

index c445a23cc458ba18ce7a90b7aa133a91f2f8db92..74772314a42ab9221327ae4ee4c1f71215328588 100644 (file)
@@ -272,21 +272,23 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz
                 * we must detect a new leader ASAP so that the fewest possible
                 * threads check the tail.
                 */
-               while ((tail_ofs = HA_ATOMIC_LOAD(tail_ptr)) & RING_TAIL_LOCK) {
-                       next_cell = HA_ATOMIC_LOAD(ring_queue_ptr);
-                       if (next_cell != &cell)
-                               goto wait_for_flush; // another thread arrived, we should go to wait now
-                       __ha_cpu_relax_for_read();
-               }
 
                /* the tail is available again and we're still the leader, try
                 * again.
                 */
-               if (HA_ATOMIC_LOAD(ring_queue_ptr) != &cell)
-                       goto wait_for_flush; // another thread arrived, we should go to wait now
+               while (1) {
+                       next_cell = HA_ATOMIC_LOAD(ring_queue_ptr);
+                       if (next_cell != &cell)
+                               goto wait_for_flush; // FIXME: another thread arrived, we should go to wait now
+                       __ha_cpu_relax_for_read();
+
+                       tail_ofs = HA_ATOMIC_FETCH_OR(tail_ptr, RING_TAIL_LOCK);
+                       if (!(tail_ofs & RING_TAIL_LOCK))
+                               break;
 
+                       __ha_cpu_relax_for_read();
+               }
                /* OK the queue is locked, let's attempt to get the tail lock */
-               tail_ofs = HA_ATOMIC_FETCH_OR(tail_ptr, RING_TAIL_LOCK);
 
                /* did we get it ? */
                if (!(tail_ofs & RING_TAIL_LOCK)) {