]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: servers: Move to a per-thread idle connection cleanup task
authorOlivier Houchard <ohouchard@haproxy.com>
Thu, 28 May 2026 15:24:08 +0000 (17:24 +0200)
committerOlivier Houchard <cognet@ci0.org>
Mon, 8 Jun 2026 13:38:22 +0000 (15:38 +0200)
Having a single task to take care of idle connection cleanup across all
servers leads to high contention. It uses a lock to maintain its tree of
servers to track, and then can acquire the idle_conns lock for each thread.
Instead, have one task per thread. Each thread will maintain its own
tree, so there will be no need for any lock, and it will just acquire
its own idle_conns lock, so it will lead to less contention.
This is a performance improvement, so backporting is optional, but may be
considered if it is worth it. That would require backporting commit
6f8dab258379dd53e327433ffd890c6d3d6f89ed too.

include/haproxy/server-t.h
include/haproxy/server.h
src/cfgparse.c
src/haproxy.c
src/server.c

index 56d2edef7299e9a91c4140f3f26488022bd08f77..74f206a922f0cae29cac306a96d4bb8a8ce43789 100644 (file)
@@ -280,6 +280,7 @@ struct srv_per_thread {
        struct ceb_root *safe_conns;            /* Safe idle connections */
        struct ceb_root *avail_conns;           /* Connections in use, but with still new streams available */
        struct server *srv;                     /* Back-pointer to the server */
+       struct eb32_node idle_node;             /* When to next do cleanup in the idle connections */
 #ifdef USE_QUIC
        struct ist quic_retry_token;
 #endif
@@ -402,7 +403,6 @@ struct server {
         * thread, and generally at the same time.
         */
        THREAD_ALIGN();
-       struct eb32_node idle_node;             /* When to next do cleanup in the idle connections */
        unsigned int curr_idle_conns;           /* Current number of orphan idling connections, both the idle and the safe lists */
        unsigned int curr_idle_nb;              /* Current number of connections in the idle list */
        unsigned int curr_safe_nb;              /* Current number of connections in the safe list */
index 0d58929683077864173bd3c8c734b64be412a4d2..3f1ebe5ee2be0d8b11594c85bd10d18ffdd058fd 100644 (file)
@@ -41,9 +41,9 @@
 #include <haproxy/tools.h>
 
 
-__decl_thread(extern HA_SPINLOCK_T idle_conn_srv_lock);
 extern struct idle_conns idle_conns[MAX_THREADS];
-extern struct task *idle_conn_task;
+extern struct task *idle_conn_task[MAX_THREADS];
+extern struct eb_root idle_conn_srv[MAX_THREADS];
 extern struct mt_list servers_list;
 extern struct dict server_key_dict;
 
index 2e684e15d73053e6b720ca6f27f84e85948bd08b..5e54500e640752a74af013b90c80d814d55f8a36 100644 (file)
@@ -2486,16 +2486,17 @@ init_proxies_list_stage1:
        /* At this point, target names have already been resolved. */
        /***********************************************************/
 
-       idle_conn_task = task_new_anywhere();
-       if (!idle_conn_task) {
-               ha_alert("parsing : failed to allocate global idle connection task.\n");
-               cfgerr++;
-       }
-       else {
-               idle_conn_task->process = srv_cleanup_idle_conns;
-               idle_conn_task->context = NULL;
+       for (int i = 0; i < global.nbthread; i++) {
+               idle_conn_srv[i] = EB_ROOT;
+               idle_conn_task[i] = task_new_on(i);
+               if (!idle_conn_task[i]) {
+                       ha_alert("parsing : failed to allocate global idle connection task.\n");
+                       cfgerr++;
+               }
+               else {
+                       idle_conn_task[i]->process = srv_cleanup_idle_conns;
+                       idle_conn_task[i]->context = NULL;
 
-               for (i = 0; i < global.nbthread; i++) {
                        idle_conns[i].cleanup_task = task_new_on(i);
                        if (!idle_conns[i].cleanup_task) {
                                ha_alert("parsing : failed to allocate idle connection tasks for thread '%d'.\n", i);
index 6af7b031e867c6ba3707567426319501c7681162..80423720339fdf7b00221fa1d8d1efa1fd4e99a6 100644 (file)
@@ -2810,6 +2810,7 @@ void deinit(void)
        struct cfg_postparser *pprs, *pprsb;
        char **tmp = init_env;
        int cur_fd;
+       int i;
 
        /* the user may want to skip this phase */
        if (global.tune.options & GTUNE_QUICK_EXIT)
@@ -2886,8 +2887,10 @@ void deinit(void)
        ha_free(&global.server_state_base);
        ha_free(&global.server_state_file);
        ha_free(&global.stats_file);
-       task_destroy(idle_conn_task);
-       idle_conn_task = NULL;
+       for (i = 0; i < global.nbthread; i++) {
+               task_destroy(idle_conn_task[i]);
+               idle_conn_task[i] = NULL;
+       }
 
        list_for_each_entry_safe(log, logb, &global.loggers, list) {
                LIST_DEL_INIT(&log->list);
index 330b0842780ec7e3a561d29e384dc07155269e36..957fc4c341e949c1348f7af566c1796c30a53877 100644 (file)
@@ -76,9 +76,8 @@ struct srv_kw_list srv_keywords = {
        .list = LIST_HEAD_INIT(srv_keywords.list)
 };
 
-__decl_thread(HA_SPINLOCK_T idle_conn_srv_lock);
-struct eb_root idle_conn_srv = EB_ROOT;
-struct task *idle_conn_task __read_mostly = NULL;
+struct eb_root idle_conn_srv[MAX_THREADS];
+struct task *idle_conn_task[MAX_THREADS] __read_mostly = {};
 struct mt_list servers_list = MT_LIST_HEAD_INIT(servers_list);
 static struct task *server_atomic_sync_task = NULL;
 static event_hdl_async_equeue server_atomic_sync_queue;
@@ -3281,6 +3280,8 @@ struct server *srv_drop(struct server *srv)
 
        /* This BUG_ON() is invalid for now as server released on deinit will
         * trigger it as they are not properly removed from their tree.
+        * This is even more relevant now, as we would need to check the
+        * idle_node for each thread
         */
        //BUG_ON(ceb_intree(&srv->addr_node) ||
        //       srv->idle_node.node.leaf_p ||
@@ -6652,7 +6653,8 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap
        cebuis_item_delete(&be->used_server_addr, addr_node, addr_key, srv);
 
        /* remove srv from idle_node tree for idle conn cleanup */
-       eb32_delete(&srv->idle_node);
+       for (ret = 0; ret < global.nbthread; ret++)
+               eb32_delete(&srv->per_thr[ret].idle_node);
 
        /* set LSB bit (odd bit) for reuse_cnt */
        srv_id_reuse_cnt |= 1;
@@ -7545,20 +7547,16 @@ int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_saf
                HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
                _HA_ATOMIC_INC(&srv->curr_idle_thr[tid]);
 
-               if (HA_ATOMIC_LOAD(&srv->idle_node.node.leaf_p) == NULL) {
-                       HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
-                       if (_HA_ATOMIC_LOAD(&srv->idle_node.node.leaf_p) == NULL) {
-                               srv->idle_node.key = tick_add(srv->pool_purge_delay,
+               if (srv->per_thr[tid].idle_node.node.leaf_p == NULL) {
+                       srv->per_thr[tid].idle_node.key = tick_add(srv->pool_purge_delay,
                                                              now_ms);
-                               eb32_insert(&idle_conn_srv, &srv->idle_node);
-                               if (!task_in_wq(idle_conn_task) && !
-                                   task_in_rq(idle_conn_task)) {
-                                       task_schedule(idle_conn_task,
-                                                     srv->idle_node.key);
-                               }
-                               BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list));
+                       eb32_insert(&idle_conn_srv[tid], &srv->per_thr[tid].idle_node);
+                       if (!task_in_wq(idle_conn_task[tid]) &&
+                           !task_in_rq(idle_conn_task[tid])) {
+                               task_schedule(idle_conn_task[tid],
+                                             srv->per_thr[tid].idle_node.key);
                        }
-                       HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);
+                       BUG_ON_STRESS(!mt_list_isempty(&conn->toremove_list));
                }
                return 1;
        }
@@ -7580,24 +7578,26 @@ struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned i
 {
        struct server *srv;
        struct eb32_node *eb;
-       int i;
        unsigned int next_wakeup;
+       int mytid = tid;
 
        next_wakeup = TICK_ETERNITY;
-       HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
        while (1) {
+               struct srv_per_thread *per_thr;
                int exceed_conns;
                int to_kill;
                int curr_idle;
+               int max_conn;
+               int removed;
 
-               eb = eb32_lookup_ge(&idle_conn_srv, now_ms - TIMER_LOOK_BACK);
+               eb = eb32_lookup_ge(&idle_conn_srv[mytid], now_ms - TIMER_LOOK_BACK);
                if (!eb) {
                        /* we might have reached the end of the tree, typically because
                         * <now_ms> is in the first half and we're first scanning the last
                        * half. Let's loop back to the beginning of the tree now.
                        */
 
-                       eb = eb32_first(&idle_conn_srv);
+                       eb = eb32_first(&idle_conn_srv[mytid]);
                        if (likely(!eb))
                                break;
                }
@@ -7606,7 +7606,8 @@ struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned i
                        next_wakeup = eb->key;
                        break;
                }
-               srv = eb32_entry(eb, struct server, idle_node);
+               per_thr = eb32_entry(eb, struct srv_per_thread, idle_node);
+               srv = per_thr->srv;
 
                /* Calculate how many idle connections we want to kill :
                 * we want to remove half the difference between the total
@@ -7619,47 +7620,41 @@ struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned i
                exceed_conns = srv->curr_used_conns + curr_idle - MAX(srv->max_used_conns, srv->est_need_conns);
                exceed_conns = to_kill = exceed_conns / 2 + (exceed_conns & 1);
 
-               srv->est_need_conns = (srv->est_need_conns + srv->max_used_conns) / 2;
+               /*
+                * It is acceptable not to lock anything before modifying
+                * est_need_conns and max_used_conns, even if multiple threads
+                * are running that task at the same time, we don't need a
+                * very high precision here, it will converge over time.
+                */
+               HA_ATOMIC_STORE(&srv->est_need_conns, (srv->est_need_conns + srv->max_used_conns) / 2);
                if (srv->est_need_conns < srv->max_used_conns)
-                       srv->est_need_conns = srv->max_used_conns;
+                       HA_ATOMIC_STORE(&srv->est_need_conns, srv->max_used_conns);
 
                HA_ATOMIC_STORE(&srv->max_used_conns, srv->curr_used_conns);
 
                if (exceed_conns <= 0)
                        goto remove;
 
-               /* check all threads starting with ours */
-               for (i = tid;;) {
-                       int max_conn;
-                       int removed;
-
-                       max_conn = (exceed_conns * srv->curr_idle_thr[i]) /
-                                  curr_idle + 1;
+               max_conn = (exceed_conns * srv->curr_idle_thr[mytid]) / curr_idle + 1;
 
-                       HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
-                       removed = srv_migrate_conns_to_remove(srv, i, max_conn);
-                       HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+               HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[mytid].idle_conns_lock);
+               removed = srv_migrate_conns_to_remove(srv, mytid, max_conn);
+               HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[mytid].idle_conns_lock);
 
-                       if (removed)
-                               task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
-
-                       if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid)
-                               break;
-               }
+               if (removed)
+                       task_wakeup(idle_conns[mytid].cleanup_task, TASK_WOKEN_OTHER);
 remove:
-               eb32_delete(&srv->idle_node);
+               eb32_delete(&srv->per_thr[mytid].idle_node);
 
-               if (srv->curr_idle_conns) {
+               if (!LIST_ISEMPTY(&srv->per_thr[mytid].idle_conn_list)) {
                        /* There are still more idle connections, add the
                         * server back in the tree.
                         */
-                       srv->idle_node.key = tick_add(srv->pool_purge_delay, now_ms);
-                       eb32_insert(&idle_conn_srv, &srv->idle_node);
-                       next_wakeup = tick_first(next_wakeup, srv->idle_node.key);
+                       srv->per_thr[mytid].idle_node.key = tick_add(srv->pool_purge_delay, now_ms);
+                       eb32_insert(&idle_conn_srv[mytid], &srv->per_thr[mytid].idle_node);
+                       next_wakeup = tick_first(next_wakeup, srv->per_thr[mytid].idle_node.key);
                }
        }
-       HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);
-
        task->expire = next_wakeup;
        return task;
 }