]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: stick-tables: give up on lock contention in process_table_expire()
authorWilly Tarreau <w@1wt.eu>
Tue, 9 Sep 2025 13:03:52 +0000 (15:03 +0200)
committerWilly Tarreau <w@1wt.eu>
Tue, 9 Sep 2025 15:56:37 +0000 (17:56 +0200)
process_table_expire() can take quite a lot of time running over all
shards. During this time it will hinder track-sc rules and peers, which
will experience an increased latency to do their work, especially peers
where each message will cause a lock, whose cumulated time can exceed
the watchdog's patience.

Here, we proceed just like in stktable_trash_oldest(), which is that
we're using a trylock to detect contention. The first time it happens,
if we hadn't purged anything, we switch to a regular lock to perform
the operation, and next time it happens we abort. This guarantees that
some entries will be expired and that contention will be reduced with
when detected.

With this change, various tests didn't manage to produce any warning,
including at the end of the load generation session.

This should be backported to 3.2 after a bit more testing.

src/stick_table.c

index a7129b2fd2118dae996486c6152da3c5f529afa0..90fc7b0e67fb8fa457fc4a1569e72746e1b5dd75 100644 (file)
@@ -926,6 +926,8 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
        int exp_next;
        int task_exp;
        int shard, init_shard;
+       int failed_once = 0;
+       int purged = 0;
 
        task_exp = TICK_ETERNITY;
 
@@ -934,7 +936,18 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
        do {
                updt_locked = 0;
                looped = 0;
-               HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+
+               if (HA_RWLOCK_TRYWRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock) != 0) {
+                       if (purged || failed_once) {
+                               /* already purged or second failed lock, yield and come back later */
+                               to_visit = 0;
+                               break;
+                       }
+                       /* make sure we succeed at least once */
+                       failed_once = 1;
+                       HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock);
+               }
+
                eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK);
 
                while (to_visit >= 0) {
@@ -1016,6 +1029,7 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
                        MT_LIST_DELETE(&ts->pend_updts);
                        eb32_delete(&ts->upd);
                        __stksess_free(t, ts);
+                       purged++;
                }
 
                /* We have found no task to expire in any tree */