From 8de8ed4f48729f07ccdf0671e64483fbc82e0964 Mon Sep 17 00:00:00 2001 From: Olivier Houchard Date: Thu, 30 Jan 2025 11:16:40 +0100 Subject: [PATCH] MEDIUM: connections: Allow taking over connections from other tgroups. Allow haproxy to take over idle connections from other thread groups than our own. To control that, add a new tunable, tune.takeover-other-tg-connections. It can have 3 values, "none", where we won't attempt to get connections from the other thread group (the default), "restricted", where we only will try to get idle connections from other thread groups when we're using reverse HTTP, and "full", where we always try to get connections from other thread groups. Unless there is a special need, it is advised to use "none" (or restricted if we're using reverse HTTP) as using connections from other thread groups may have a performance impact. --- doc/configuration.txt | 12 +++++++++++ include/haproxy/fd.h | 16 ++++++++++++++ include/haproxy/global-t.h | 8 +++++++ src/backend.c | 25 +++++++++++++++++----- src/cfgparse-global.c | 17 +++++++++++++++ src/fd.c | 43 +++++++++++++++++++++++++++++++------- 6 files changed, 108 insertions(+), 13 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index b7d97d0ec..18f5bd465 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -1724,6 +1724,7 @@ The following keywords are supported in the "global" section : - tune.ssl.ssl-ctx-cache-size - tune.ssl.ocsp-update.maxdelay (deprecated) - tune.ssl.ocsp-update.mindelay (deprecated) + - tune.takeover-other-tg-connections - tune.vars.global-max-size - tune.vars.proc-max-size - tune.vars.reqres-max-size @@ -4584,6 +4585,17 @@ tune.stick-counters no "track-sc" rules are used, the value may be lowered (0 being valid to entirely disable stick-counters). +tune.takeover-other-tg-connections + By default, we won't attempt to use idle connections from other thread groups. + This can however be changed. Valid values for are : "none", the + default, if used, no attempt will be made to use idle connections from + other thread groups, "restricted" where we will only attempt to get an idle + connection from another thread if we're using protocols that can't create + new connections, such as reverse http, and "full" where we will always look + in other thread groups for idle connections. + Note that using connections from other thread groups can occur performance + penalties, so it should not be used unless really needed. + tune.vars.global-max-size tune.vars.proc-max-size tune.vars.reqres-max-size diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h index b9bdc8f4f..2912b7288 100644 --- a/include/haproxy/fd.h +++ b/include/haproxy/fd.h @@ -448,6 +448,22 @@ static inline void fd_claim_tgid(int fd, uint desired_tgid) } } +/* + * Update the FD's TGID. + * This should be called with the lock held, and will drop the lock once + * the TGID is updated. + * The reference counter is however preserved. + */ +static inline void fd_update_tgid(int fd, uint desired_tgid) +{ + unsigned int orig_tgid = fdtab[fd].refc_tgid; + unsigned int new_tgid; + /* Remove the lock, and switch to the new tgid */ + do { + new_tgid = (orig_tgid & 0xffff0000) | desired_tgid; + } while (!_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &orig_tgid, new_tgid) && __ha_cpu_relax()); +} + /* atomically read the running mask if the tgid matches, or returns zero if it * does not match. This is meant for use in code paths where the bit is expected * to be present and will be sufficient to protect against a short-term group diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index 508ef846a..e3a078b28 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -109,6 +109,13 @@ enum { SSL_SERVER_VERIFY_REQUIRED = 1, }; +/* Takeover across thread groups */ +enum threadgroup_takeover { + NO_THREADGROUP_TAKEOVER = 0, + RESTRICTED_THREADGROUP_TAKEOVER = 1, + FULL_THREADGROUP_TAKEOVER = 2, +}; + /* bit values to go with "warned" above */ #define WARN_ANY 0x00000001 /* any warning was emitted */ #define WARN_FORCECLOSE_DEPRECATED 0x00000002 @@ -199,6 +206,7 @@ struct global { int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */ uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */ uint ring_queues; /* if >0, #ring queues, otherwise equals #thread groups */ + enum threadgroup_takeover tg_takeover; /* Policy for threadgroup takeover */ #ifdef USE_QUIC unsigned int quic_backend_max_idle_timeout; unsigned int quic_frontend_max_idle_timeout; diff --git a/src/backend.c b/src/backend.c index 56381a49f..6e9537a87 100644 --- a/src/backend.c +++ b/src/backend.c @@ -1263,7 +1263,9 @@ int alloc_bind_address(struct sockaddr_storage **ss, */ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is_safe, int64_t hash) { + const struct tgroup_info *curtg = tg; struct connection *conn = NULL; + unsigned int curtgid = tgid; int i; // thread number int found = 0; int stop; @@ -1318,10 +1320,10 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is * unvisited thread, but always staying in the same group. */ stop = srv->per_tgrp[tgid - 1].next_takeover; - if (stop >= tg->count) - stop %= tg->count; - - stop += tg->base; + if (stop >= curtg->count) + stop %= curtg->count; + stop += curtg->base; +check_tgid: i = stop; do { if (!srv->curr_idle_thr[i] || i == tid) @@ -1356,8 +1358,21 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is } } HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock); - } while (!found && (i = (i + 1 == tg->base + tg->count) ? tg->base : i + 1) != stop); + } while (!found && (i = (i + 1 == curtg->base + curtg->count) ? curtg->base : i + 1) != stop); + if (!found && (global.tune.tg_takeover == FULL_THREADGROUP_TAKEOVER || + (global.tune.tg_takeover == RESTRICTED_THREADGROUP_TAKEOVER && + srv->flags & SRV_F_RHTTP))) { + curtgid = curtgid + 1; + if (curtgid == global.nbtgroups + 1) + curtgid = 1; + /* If we haven't looped yet */ + if (curtgid != tgid) { + curtg = &ha_tgroup_info[curtgid - 1]; + stop = curtg->base; + goto check_tgid; + } + } if (!found) conn = NULL; done: diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c index 4064b2b9c..8e698ba1e 100644 --- a/src/cfgparse-global.c +++ b/src/cfgparse-global.c @@ -1348,6 +1348,22 @@ static int cfg_parse_global_tune_opts(char **args, int section_type, return -1; } } + else if (strcmp(args[0], "tune.takeover-other-tg-connections") == 0) { + if (*(args[1]) == 0) { + memprintf(err, "'%s' expects 'none', 'restricted', or 'full'", args[0]); + return -1; + } + if (strcmp(args[1], "none") == 0) + global.tune.tg_takeover = NO_THREADGROUP_TAKEOVER; + else if (strcmp(args[1], "restricted") == 0) + global.tune.tg_takeover = RESTRICTED_THREADGROUP_TAKEOVER; + else if (strcmp(args[1], "full") == 0) + global.tune.tg_takeover = FULL_THREADGROUP_TAKEOVER; + else { + memprintf(err, "'%s' expects 'none', 'restricted', or 'full', got '%s'", args[0], args[1]); + return -1; + } + } else { BUG_ON(1, "Triggered in cfg_parse_global_tune_opts() by unsupported keyword.\n"); return -1; @@ -1716,6 +1732,7 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.http.maxhdr", cfg_parse_global_tune_opts }, { CFG_GLOBAL, "tune.comp.maxlevel", cfg_parse_global_tune_opts }, { CFG_GLOBAL, "tune.pattern.cache-size", cfg_parse_global_tune_opts }, + { CFG_GLOBAL, "tune.takeover-other-tg-connections", cfg_parse_global_tune_opts }, { CFG_GLOBAL, "tune.disable-fast-forward", cfg_parse_global_tune_forward_opts }, { CFG_GLOBAL, "tune.disable-zero-copy-forwarding", cfg_parse_global_tune_forward_opts }, { CFG_GLOBAL, "tune.chksize", cfg_parse_global_unsupported_opts }, diff --git a/src/fd.c b/src/fd.c index 97d58e797..cd30b34d4 100644 --- a/src/fd.c +++ b/src/fd.c @@ -512,6 +512,8 @@ void fd_migrate_on(int fd, uint new_tid) int fd_takeover(int fd, void *expected_owner) { unsigned long old; + int changing_tgid = 0; + int old_ltid, old_tgid; /* protect ourself against a delete then an insert for the same fd, * if it happens, then the owner will no longer be the expected @@ -520,17 +522,31 @@ int fd_takeover(int fd, void *expected_owner) if (fdtab[fd].owner != expected_owner) return -1; - /* we must be alone to work on this idle FD. If not, it means that its - * poller is currently waking up and is about to use it, likely to - * close it on shut/error, but maybe also to process any unexpectedly - * pending data. It's also possible that the FD was closed and - * reassigned to another thread group, so let's be careful. - */ - if (unlikely(!fd_grab_tgid(fd, ti->tgid))) - return -1; + /* We're taking a connection from a different thread group */ + if ((fdtab[fd].refc_tgid & 0x7fff) != tgid) { + changing_tgid = 1; + + old_tgid = fd_tgid(fd); + BUG_ON(atleast2(fdtab[fd].thread_mask)); + old_ltid = my_ffsl(fdtab[fd].thread_mask) - 1; + + if (unlikely(!fd_lock_tgid_cur(fd))) + return -1; + } else { + /* we must be alone to work on this idle FD. If not, it means that its + * poller is currently waking up and is about to use it, likely to + * close it on shut/error, but maybe also to process any unexpectedly + * pending data. It's also possible that the FD was closed and + * reassigned to another thread group, so let's be careful. + */ + if (unlikely(!fd_grab_tgid(fd, ti->tgid))) + return -1; + } old = 0; if (!HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old, ti->ltid_bit)) { + if (changing_tgid) + fd_unlock_tgid(fd); fd_drop_tgid(fd); return -1; } @@ -538,6 +554,17 @@ int fd_takeover(int fd, void *expected_owner) /* success, from now on it's ours */ HA_ATOMIC_STORE(&fdtab[fd].thread_mask, ti->ltid_bit); + /* + * Change the tgid to our own tgid. + * This removes the lock, we don't need it anymore, but we keep + * the refcount. + */ + if (changing_tgid) { + fd_update_tgid(fd, tgid); + if (cur_poller.fixup_tgid_takeover) + cur_poller.fixup_tgid_takeover(&cur_poller, fd, old_ltid, old_tgid); + } + /* Make sure the FD doesn't have the active bit. It is possible that * the fd is polled by the thread that used to own it, the new thread * is supposed to call subscribe() later, to activate polling. -- 2.39.5