]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
IMPORT: plock: give higher precedence to W than S
authorWilly Tarreau <w@1wt.eu>
Fri, 7 Feb 2025 15:57:28 +0000 (16:57 +0100)
committerWilly Tarreau <w@1wt.eu>
Fri, 7 Feb 2025 17:04:29 +0000 (18:04 +0100)
It was noticed in haproxy that in certain extreme cases, a write lock
subject to EBO may fail for a very long time in front of a large set
of readers constantly trying to upgrade to the S state. The reason is
that among many readers, one will succeed in its upgrade, and this
situation can last for a very long time with many readers upgrading
in turn, while the writer waits longer and longer before trying again.

Here we're taking a reasonable approach which is that the write lock
should have a higher precedence in its attempt to grab the lock. What
is done is that instead of fully rolling back in case of conflict with
a pure S lock, the writer will only release its read part in order to
let the S upgrade to W if needed, and finish its operations. This
guarantees no other seek/read/write can enter. Once the conflict is
resolved, the writer grabs the read part again and waits for readers
to be gone (in practice it could even return without waiting since we
know that any possible wanderers would leave or even not be there at
all, but it avoids a complicated loop code that wouldn't improve the
practical situation but inflate the code).

Thanks to this change, the maximum write lock latency on a 48 threads
AMD with aheavily loaded scheduler went down from 256 to 64 ms, and the
number of occurrences of 32ms or more was divided by 300, while all
occurrences of 1ms or less were multiplied by up to 3 (3 for the 4-16ns
cases).

This is plock commit b6a28366d156812f59c91346edc2eab6374a5ebd.

include/import/plock.h

index 47ff617a97bd5efff05fa3f3e36471ea4f183234..5c2860561310cfe75c762eef8bc0d3a015ae7e4e 100644 (file)
@@ -549,6 +549,13 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
                        __pl_r = pl_ldadd_acq(__lk_r, __set_r);                                \
                        if (!__builtin_expect(__pl_r & __msk_r, 0))                            \
                                break;                                                         \
+                       if (!__builtin_expect(__pl_r & PLOCK64_WL_ANY, 0)) {                   \
+                               /* S only: let it finish but impose ourselves */               \
+                               pl_sub_noret_lax(__lk_r, PLOCK64_RL_1);                        \
+                               __pl_r = pl_wait_unlock_long(__lk_r, PLOCK64_RL_ANY);          \
+                               __pl_r = pl_ldadd_acq(__lk_r, PLOCK64_RL_1);                   \
+                               break;                                                         \
+                       }                                                                      \
                        pl_sub_noret_lax(__lk_r, __set_r);                                     \
                        __pl_r = pl_wait_unlock_long(__lk_r, __msk_r);                         \
                }                                                                              \
@@ -566,6 +573,13 @@ static unsigned int pl_wait_new_int(const unsigned int *lock, const unsigned int
                        __pl_r = pl_ldadd_acq(__lk_r, __set_r);                                \
                        if (!__builtin_expect(__pl_r & __msk_r, 0))                            \
                                break;                                                         \
+                       if (!__builtin_expect(__pl_r & PLOCK32_WL_ANY, 0)) {                   \
+                               /* S only: let it finish but impose ourselves */               \
+                               pl_sub_noret_lax(__lk_r, PLOCK32_RL_1);                        \
+                               __pl_r = pl_wait_unlock_int(__lk_r, PLOCK32_RL_ANY);          \
+                               __pl_r = pl_ldadd_acq(__lk_r, PLOCK32_RL_1);                   \
+                               break;                                                         \
+                       }                                                                      \
                        pl_sub_noret_lax(__lk_r, __set_r);                                     \
                        __pl_r = pl_wait_unlock_int(__lk_r, __msk_r);                          \
                }                                                                              \