]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: stick-tables: defer adding updates to a tasklet
authorOlivier Houchard <ohouchard@haproxy.com>
Fri, 2 May 2025 11:46:54 +0000 (11:46 +0000)
committerWilly Tarreau <w@1wt.eu>
Fri, 2 May 2025 13:27:55 +0000 (15:27 +0200)
There is a lot of contention trying to add updates to the tree. So
instead of trying to add the updates to the tree right away, just add
them to a mt-list (with one mt-list per thread group, so that the
mt-list does not become the new point of contention that much), and
create a tasklet dedicated to adding updates to the tree, in batchs, to
avoid keeping the update lock for too long.
This helps getting stick tables perform better under heavy load.

include/haproxy/defaults.h
include/haproxy/stick_table-t.h
src/stick_table.c

index 462793eff29dd6f9ec4492159e117a6b959e8917..c18b2f079330aa730ee7ece60305f9ad28d14542 100644 (file)
 #define QUIC_MAX_TX_MEM 0
 #endif
 
+#ifndef STKTABLE_MAX_UPDATES_AT_ONCE
+#define STKTABLE_MAX_UPDATES_AT_ONCE 100
+#endif /* STKTABLE_MAX_UPDATES_AT_ONCE */
+
 #endif /* _HAPROXY_DEFAULTS_H */
index 2340cdf59fc34f8dd7fb6297d880b556f60d50fc..454a35c28c22a9b3712c7a9038ab16c4ef543e48 100644 (file)
@@ -151,6 +151,8 @@ struct stksess {
        int seen;                 /* 0 only when no peer has seen this entry yet */
        struct eb32_node exp;     /* ebtree node used to hold the session in expiration tree */
        struct eb32_node upd;     /* ebtree node used to hold the update sequence tree */
+       struct mt_list pend_updts;/* list of entries to be inserted/moved in the update sequence tree */
+       int updt_is_local;        /* is the update a local one ? */
        struct ebmb_node key;     /* ebtree node used to hold the session in table */
        /* WARNING! do not put anything after <keys>, it's used by the key */
 };
@@ -220,9 +222,11 @@ struct stktable {
        THREAD_ALIGN(64);
 
        struct eb_root updates;   /* head of sticky updates sequence tree, uses updt_lock */
+       struct mt_list *pend_updts; /* list of updates to be added to the update sequence tree, one per thread-group */
        unsigned int update;      /* uses updt_lock */
        unsigned int localupdate; /* uses updt_lock */
        unsigned int commitupdate;/* used to identify the latest local updates pending for sync, uses updt_lock */
+       struct tasklet *updt_task;/* tasklet responsable for pushing the pending updates into the tree */
 
        THREAD_ALIGN(64);
        /* this lock is heavily used and must be on its own cache line */
index 8b5192f9d76acd9eba146632cf166e4d860c44da..3be38d82789b0280b7a1529e01241f8c46e3a4e7 100644 (file)
@@ -144,12 +144,13 @@ int __stksess_kill(struct stktable *t, struct stksess *ts)
        if (HA_ATOMIC_LOAD(&ts->ref_cnt))
                return 0;
 
-       if (ts->upd.node.leaf_p) {
+       if (ts->upd.node.leaf_p || !MT_LIST_ISEMPTY(&ts->pend_updts)) {
                updt_locked = 1;
                HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
                if (HA_ATOMIC_LOAD(&ts->ref_cnt))
                        goto out_unlock;
        }
+       MT_LIST_DELETE(&ts->pend_updts);
        eb32_delete(&ts->exp);
        eb32_delete(&ts->upd);
        ebmb_delete(&ts->key);
@@ -271,6 +272,7 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts)
        ts->key.node.leaf_p = NULL;
        ts->exp.node.leaf_p = NULL;
        ts->upd.node.leaf_p = NULL;
+       MT_LIST_INIT(&ts->pend_updts);
        ts->expire = tick_add(now_ms, MS_TO_TICKS(t->expire));
        HA_RWLOCK_INIT(&ts->lock);
        return ts;
@@ -362,20 +364,19 @@ int stktable_trash_oldest(struct stktable *t, int to_batch)
                         * with that lock held, will grab a ref_cnt before releasing the
                         * lock. So we must take this lock as well and check the ref_cnt.
                         */
-                       if (ts->upd.node.leaf_p) {
-                               if (!updt_locked) {
-                                       updt_locked = 1;
-                                       HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
-                               }
-                               /* now we're locked, new peers can't grab it anymore,
-                                * existing ones already have the ref_cnt.
-                                */
-                               if (HA_ATOMIC_LOAD(&ts->ref_cnt))
-                                       continue;
+                       if (!updt_locked) {
+                               updt_locked = 1;
+                               HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
                        }
+                       /* now we're locked, new peers can't grab it anymore,
+                        * existing ones already have the ref_cnt.
+                        */
+                       if (HA_ATOMIC_LOAD(&ts->ref_cnt))
+                               continue;
 
                        /* session expired, trash it */
                        ebmb_delete(&ts->key);
+                       MT_LIST_DELETE(&ts->pend_updts);
                        eb32_delete(&ts->upd);
                        __stksess_free(t, ts);
                        batched++;
@@ -585,9 +586,7 @@ struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts)
  */
 void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, int expire, int decrefcnt)
 {
-       struct eb32_node * eb;
-       int use_wrlock = 0;
-       int do_wakeup = 0;
+       int did_append = 0;
 
        if (expire != HA_ATOMIC_LOAD(&ts->expire)) {
                /* we'll need to set the expiration and to wake up the expiration timer .*/
@@ -602,63 +601,24 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local,
                         * scheduled for at least one peer.
                         */
                        if (!ts->upd.node.leaf_p || _HA_ATOMIC_LOAD(&ts->seen)) {
-                               /* Time to upgrade the read lock to write lock */
-                               HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
-                               use_wrlock = 1;
-
-                               /* here we're write-locked */
-
-                               ts->seen = 0;
-                               ts->upd.key = ++t->update;
-                               t->localupdate = t->update;
-                               eb32_delete(&ts->upd);
-                               eb = eb32_insert(&t->updates, &ts->upd);
-                               if (eb != &ts->upd)  {
-                                       eb32_delete(eb);
-                                       eb32_insert(&t->updates, &ts->upd);
-                               }
+                               _HA_ATOMIC_STORE(&ts->updt_is_local, 1);
+                               did_append = MT_LIST_TRY_APPEND(&t->pend_updts[tgid - 1], &ts->pend_updts);
                        }
-                       do_wakeup = 1;
                }
                else {
-                       /* Note: we land here when learning new entries from
-                        * remote peers. We hold one ref_cnt so the entry
-                        * cannot vanish under us, however if two peers create
-                        * the same key at the exact same time, we must be
-                        * careful not to perform two parallel inserts! Hence
-                        * we need to first check leaf_p to know if the entry
-                        * is new, then lock the tree and check the entry again
-                        * (since another thread could have created it in the
-                        * mean time).
-                        */
                        if (!ts->upd.node.leaf_p) {
-                               /* Time to upgrade the read lock to write lock if needed */
-                               HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
-                               use_wrlock = 1;
-
-                               /* here we're write-locked */
-                               if (!ts->upd.node.leaf_p) {
-                                       ts->seen = 0;
-                                       ts->upd.key= (++t->update)+(2147483648U);
-                                       eb = eb32_insert(&t->updates, &ts->upd);
-                                       if (eb != &ts->upd) {
-                                               eb32_delete(eb);
-                                               eb32_insert(&t->updates, &ts->upd);
-                                       }
-                               }
+                               _HA_ATOMIC_STORE(&ts->updt_is_local, 0);
+                               did_append = MT_LIST_TRY_APPEND(&t->pend_updts[tgid - 1], &ts->pend_updts);
                        }
                }
 
-               /* drop the lock now */
-               if (use_wrlock)
-                       HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
        }
 
+       if (did_append)
+               tasklet_wakeup(t->updt_task);
+
        if (decrefcnt)
                HA_ATOMIC_DEC(&ts->ref_cnt);
-
-       if (do_wakeup)
-               task_wakeup(t->sync_task, TASK_WOKEN_MSG);
 }
 
 /* Update the expiration timer for <ts> but do not touch its expiration node.
@@ -809,6 +769,60 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *
        return ts;
 }
 
+static struct task *stktable_add_pend_updates(struct task *t, void *ctx, unsigned int state)
+{
+       struct stktable *table = ctx;
+       struct eb32_node *eb;
+       int i, is_local, cur_tgid = tgid - 1, empty_tgid = 0;
+
+       HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &table->updt_lock);
+       for (i = 0; i < STKTABLE_MAX_UPDATES_AT_ONCE; i++) {
+               struct stksess *stksess = MT_LIST_POP(&table->pend_updts[cur_tgid], typeof(stksess), pend_updts);
+
+               if (!stksess) {
+                       empty_tgid++;
+                       cur_tgid++;
+                       if (cur_tgid == global.nbtgroups)
+                        cur_tgid = 0;
+
+                       if (empty_tgid == global.nbtgroups)
+                               break;
+                       continue;
+               }
+               cur_tgid++;
+               empty_tgid = 0;
+               if (cur_tgid == global.nbtgroups)
+                       cur_tgid = 0;
+               is_local = stksess->updt_is_local;
+               stksess->seen = 0;
+               if (is_local) {
+                       stksess->upd.key = ++table->update;
+                       table->localupdate = table->update;
+                       eb32_delete(&stksess->upd);
+               } else {
+                       stksess->upd.key = (++table->update) + (2147483648U);
+               }
+               eb = eb32_insert(&table->updates, &stksess->upd);
+               if (eb != &stksess->upd)  {
+                       BUG_ON(1);
+                       eb32_delete(eb);
+                       eb32_insert(&table->updates, &stksess->upd);
+               }
+       }
+
+       HA_RWLOCK_WRUNLOCK(STK_TABLE_UPDT_LOCK, &table->updt_lock);
+
+       /* There's more to do, let's schedule another session */
+       if (empty_tgid < global.nbtgroups)
+               tasklet_wakeup(table->updt_task);
+
+       if (i > 0) {
+               /* We did at least one update, let's wake the sync task */
+               task_wakeup(table->sync_task, TASK_WOKEN_MSG);
+       }
+       return t;
+}
+
 /* Lookup for an entry with the same key and store the submitted
  * stksess if not found. This function locks the table either shared or
  * exclusively, and the refcount of the entry is increased.
@@ -938,20 +952,19 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int
                         * with that lock held, will grab a ref_cnt before releasing the
                         * lock. So we must take this lock as well and check the ref_cnt.
                         */
-                       if (ts->upd.node.leaf_p) {
-                               if (!updt_locked) {
-                                       updt_locked = 1;
-                                       HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
-                               }
-                               /* now we're locked, new peers can't grab it anymore,
-                                * existing ones already have the ref_cnt.
-                                */
-                               if (HA_ATOMIC_LOAD(&ts->ref_cnt))
-                                       continue;
+                       if (!updt_locked) {
+                               updt_locked = 1;
+                               HA_RWLOCK_WRLOCK(STK_TABLE_UPDT_LOCK, &t->updt_lock);
                        }
+                       /* now we're locked, new peers can't grab it anymore,
+                        * existing ones already have the ref_cnt.
+                        */
+                       if (HA_ATOMIC_LOAD(&ts->ref_cnt))
+                               continue;
 
                        /* session expired, trash it */
                        ebmb_delete(&ts->key);
+                       MT_LIST_DELETE(&ts->pend_updts);
                        eb32_delete(&ts->upd);
                        __stksess_free(t, ts);
                }
@@ -988,6 +1001,7 @@ int stktable_init(struct stktable *t, char **err_msg)
 {
        int peers_retval = 0;
        int shard;
+       int i;
 
        t->hash_seed = XXH64(t->id, t->idlen, 0);
 
@@ -1047,6 +1061,16 @@ int stktable_init(struct stktable *t, char **err_msg)
 
                t->write_to.t = table;
        }
+       t->pend_updts = calloc(global.nbtgroups, sizeof(*t->pend_updts));
+       if (!t->pend_updts)
+               goto mem_error;
+       for (i = 0; i < global.nbtgroups; i++)
+               MT_LIST_INIT(&t->pend_updts[i]);
+       t->updt_task = tasklet_new();
+       if (!t->updt_task)
+               goto mem_error;
+       t->updt_task->context = t;
+       t->updt_task->process = stktable_add_pend_updates;
        return 1;
 
  mem_error:
@@ -1065,6 +1089,8 @@ void stktable_deinit(struct stktable *t)
        if (!t)
                return;
        task_destroy(t->exp_task);
+       tasklet_free(t->updt_task);
+       ha_free(&t->pend_updts);
        pool_destroy(t->pool);
 }