]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
BUG/MEDIUM: peers: don't fail twice to grab the update lock
authorWilly Tarreau <w@1wt.eu>
Wed, 3 Sep 2025 09:54:45 +0000 (11:54 +0200)
committerWilly Tarreau <w@1wt.eu>
Wed, 3 Sep 2025 13:51:13 +0000 (15:51 +0200)
When the expire task is running fast (i.e. running almost alone), it's
super hard to grab the update lock and peers can easily trigger the
watchdog because the time it takes to grab this lock is multiplied by
the number of updates to perform. This is easier to trigger at the end
of an injection session where the expire task is omni-present. Let's
just record that we failed once and don't fail a second time in the
loop.

This should be backported to 3.2, but probably not further given that
this area changed significantly in 3.2.

src/peers.c

index dd6988c623ce41b4b03863a10d50c41e574c4fb3..302cef79ade33186547508f8ac74c348ffce4cca 100644 (file)
@@ -1556,6 +1556,7 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p,
 {
        int ret, new_pushed, use_timed;
        int updates_sent = 0;
+       int failed_once = 0;
 
        ret = 1;
        use_timed = 0;
@@ -1598,7 +1599,25 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p,
                HA_RWLOCK_RDUNLOCK(STK_TABLE_UPDT_LOCK, &st->table->updt_lock);
 
                ret = peer_send_updatemsg(st, appctx, ts, updateid, new_pushed, use_timed);
-               HA_RWLOCK_RDLOCK(STK_TABLE_UPDT_LOCK, &st->table->updt_lock);
+
+               if (HA_RWLOCK_TRYRDLOCK(STK_TABLE_UPDT_LOCK, &st->table->updt_lock) != 0) {
+                       if (failed_once) {
+                               /* we've already faced contention twice in this
+                                * loop, this is getting serious, do not insist
+                                * anymore and come back later
+                                */
+                               HA_ATOMIC_DEC(&ts->ref_cnt);
+                               applet_have_more_data(appctx);
+                               ret = -1;
+                               goto out_unlocked;
+                       }
+                       /* OK contention happens, for this one we'll wait on the
+                        * lock, but only once.
+                        */
+                       failed_once++;
+                       HA_RWLOCK_RDLOCK(STK_TABLE_UPDT_LOCK, &st->table->updt_lock);
+               }
+
                HA_ATOMIC_DEC(&ts->ref_cnt);
                if (ret <= 0)
                        break;
@@ -1628,6 +1647,7 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p,
 
  out:
        HA_RWLOCK_RDUNLOCK(STK_TABLE_UPDT_LOCK, &st->table->updt_lock);
+ out_unlocked:
        return ret;
 }