MEDIUM: connections: Allow taking over connections from other tgroups.

author Olivier Houchard <ohouchard@haproxy.com>

Thu, 30 Jan 2025 10:16:40 +0000 (11:16 +0100)

committer Olivier Houchard <cognet@ci0.org>

Wed, 26 Feb 2025 12:00:18 +0000 (13:00 +0100)
author Olivier Houchard <ohouchard@haproxy.com>
Thu, 30 Jan 2025 10:16:40 +0000 (11:16 +0100)
committer Olivier Houchard <cognet@ci0.org>
Wed, 26 Feb 2025 12:00:18 +0000 (13:00 +0100)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index b7d97d0ecc0ae50ea1fbe4bb76b8117dac8bc1f6..18f5bd4655540e5d45d75053b86c9b4f69a97246 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -1724,6 +1724,7 @@ The following keywords are supported in the "global" section :
     - tune.ssl.ssl-ctx-cache-size
     - tune.ssl.ocsp-update.maxdelay (deprecated)
     - tune.ssl.ocsp-update.mindelay (deprecated)
+   - tune.takeover-other-tg-connections
     - tune.vars.global-max-size
     - tune.vars.proc-max-size
     - tune.vars.reqres-max-size
@@ -4584,6 +4585,17 @@ tune.stick-counters <number>
    no "track-sc" rules are used, the value may be lowered (0 being valid to
    entirely disable stick-counters).
  
+tune.takeover-other-tg-connections <value>
+  By default, we won't attempt to use idle connections from other thread groups.
+  This can however be changed. Valid values for <value> are : "none", the
+  default, if used, no attempt will be made to use idle connections from
+  other thread groups, "restricted" where we will only attempt to get an idle
+  connection from another thread if we're using protocols that can't create
+  new connections, such as reverse http, and "full" where we will always look
+  in other thread groups for idle connections.
+  Note that using connections from other thread groups can occur performance
+  penalties, so it should not be used unless really needed.
+
  tune.vars.global-max-size <size>
  tune.vars.proc-max-size <size>
  tune.vars.reqres-max-size <size>
diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h

index b9bdc8f4f74827908ccc36ab6d14039dd4e41fc1..2912b72886003ab4b9cca3c3ef09f9e7c14db680 100644 (file)
--- a/include/haproxy/fd.h
+++ b/include/haproxy/fd.h
@@ -448,6 +448,22 @@ static inline void fd_claim_tgid(int fd, uint desired_tgid)
         }
  }
  
+/*
+ * Update the FD's TGID.
+ * This should be called with the lock held, and will drop the lock once
+ * the TGID is updated.
+ * The reference counter is however preserved.
+ */
+static inline void fd_update_tgid(int fd, uint desired_tgid)
+{
+       unsigned int orig_tgid = fdtab[fd].refc_tgid;
+       unsigned int new_tgid;
+       /* Remove the lock, and switch to the new tgid */
+       do {
+               new_tgid = (orig_tgid & 0xffff0000) | desired_tgid;
+       } while (!_HA_ATOMIC_CAS(&fdtab[fd].refc_tgid, &orig_tgid, new_tgid) && __ha_cpu_relax());
+}
+
  /* atomically read the running mask if the tgid matches, or returns zero if it
   * does not match. This is meant for use in code paths where the bit is expected
   * to be present and will be sufficient to protect against a short-term group
diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h

index 508ef846adc354ea165cbaf6234ad2863a93e9cf..e3a078b28ab453816a7e56a1e318e5588dd7f448 100644 (file)
--- a/include/haproxy/global-t.h
+++ b/include/haproxy/global-t.h
@@ -109,6 +109,13 @@ enum {
         SSL_SERVER_VERIFY_REQUIRED = 1,
  };
  
+/* Takeover across thread groups */
+enum threadgroup_takeover {
+       NO_THREADGROUP_TAKEOVER = 0,
+       RESTRICTED_THREADGROUP_TAKEOVER = 1,
+       FULL_THREADGROUP_TAKEOVER = 2,
+};
+
  /* bit values to go with "warned" above */
  #define WARN_ANY                    0x00000001 /* any warning was emitted */
  #define WARN_FORCECLOSE_DEPRECATED  0x00000002
@@ -199,6 +206,7 @@ struct global {
                 int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */
                 uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */
                 uint ring_queues;   /* if >0, #ring queues, otherwise equals #thread groups */
+               enum threadgroup_takeover tg_takeover; /* Policy for threadgroup takeover */
  #ifdef USE_QUIC
                 unsigned int quic_backend_max_idle_timeout;
                 unsigned int quic_frontend_max_idle_timeout;
diff --git a/src/backend.c b/src/backend.c

index 56381a49f99120bb4321b63af91290599a360d7c..6e9537a87d362e89c50610f61df9900ad300d048 100644 (file)
--- a/src/backend.c
+++ b/src/backend.c
@@ -1263,7 +1263,9 @@ int alloc_bind_address(struct sockaddr_storage **ss,
   */
  struct connection *conn_backend_get(struct stream *s, struct server *srv, int is_safe, int64_t hash)
  {
+       const struct tgroup_info *curtg = tg;
         struct connection *conn = NULL;
+       unsigned int curtgid = tgid;
         int i; // thread number
         int found = 0;
         int stop;
@@ -1318,10 +1320,10 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is
          * unvisited thread, but always staying in the same group.
          */
         stop = srv->per_tgrp[tgid - 1].next_takeover;
-       if (stop >= tg->count)
-               stop %= tg->count;
-
-       stop += tg->base;
+       if (stop >= curtg->count)
+               stop %= curtg->count;
+       stop += curtg->base;
+check_tgid:
         i = stop;
         do {
                 if (!srv->curr_idle_thr[i] || i == tid)
@@ -1356,8 +1358,21 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is
                         }
                 }
                 HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
-       } while (!found && (i = (i + 1 == tg->base + tg->count) ? tg->base : i + 1) != stop);
+       } while (!found && (i = (i + 1 == curtg->base + curtg->count) ? curtg->base : i + 1) != stop);
  
+       if (!found && (global.tune.tg_takeover == FULL_THREADGROUP_TAKEOVER ||
+           (global.tune.tg_takeover == RESTRICTED_THREADGROUP_TAKEOVER &&
+           srv->flags & SRV_F_RHTTP))) {
+               curtgid = curtgid + 1;
+               if (curtgid == global.nbtgroups + 1)
+                       curtgid = 1;
+               /* If we haven't looped yet */
+               if (curtgid != tgid) {
+                       curtg = &ha_tgroup_info[curtgid - 1];
+                       stop = curtg->base;
+                       goto check_tgid;
+               }
+       }
         if (!found)
                 conn = NULL;
   done:
diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c

index 4064b2b9c5c5cc2ee478e55560a077440683d1b8..8e698ba1efd0ba0e2ea9ce0002ed18b89e45da4f 100644 (file)
--- a/src/cfgparse-global.c
+++ b/src/cfgparse-global.c
@@ -1348,6 +1348,22 @@ static int cfg_parse_global_tune_opts(char **args, int section_type,
                         return -1;
                 }
         }
+       else if (strcmp(args[0], "tune.takeover-other-tg-connections") == 0) {
+               if (*(args[1]) == 0) {
+                       memprintf(err, "'%s' expects 'none', 'restricted', or 'full'", args[0]);
+                       return -1;
+               }
+               if (strcmp(args[1], "none") == 0)
+                       global.tune.tg_takeover = NO_THREADGROUP_TAKEOVER;
+               else if (strcmp(args[1], "restricted") == 0)
+                       global.tune.tg_takeover = RESTRICTED_THREADGROUP_TAKEOVER;
+               else if (strcmp(args[1], "full") == 0)
+                       global.tune.tg_takeover = FULL_THREADGROUP_TAKEOVER;
+               else {
+                       memprintf(err, "'%s' expects 'none', 'restricted', or 'full', got '%s'", args[0], args[1]);
+                       return -1;
+               }
+       }
         else {
                 BUG_ON(1, "Triggered in cfg_parse_global_tune_opts() by unsupported keyword.\n");
                 return -1;
@@ -1716,6 +1732,7 @@ static struct cfg_kw_list cfg_kws = {ILH, {
         { CFG_GLOBAL, "tune.http.maxhdr", cfg_parse_global_tune_opts },
         { CFG_GLOBAL, "tune.comp.maxlevel", cfg_parse_global_tune_opts },
         { CFG_GLOBAL, "tune.pattern.cache-size", cfg_parse_global_tune_opts },
+       { CFG_GLOBAL, "tune.takeover-other-tg-connections", cfg_parse_global_tune_opts },
         { CFG_GLOBAL, "tune.disable-fast-forward", cfg_parse_global_tune_forward_opts },
         { CFG_GLOBAL, "tune.disable-zero-copy-forwarding", cfg_parse_global_tune_forward_opts },
         { CFG_GLOBAL, "tune.chksize", cfg_parse_global_unsupported_opts },
diff --git a/src/fd.c b/src/fd.c

index 97d58e797fdac3c91797d9e6f3d06a7c1d2de6c8..cd30b34d4a8ab3903eea155d480440e9025ae98f 100644 (file)
--- a/src/fd.c
+++ b/src/fd.c
@@ -512,6 +512,8 @@ void fd_migrate_on(int fd, uint new_tid)
  int fd_takeover(int fd, void *expected_owner)
  {
         unsigned long old;
+       int changing_tgid = 0;
+       int old_ltid, old_tgid;
  
         /* protect ourself against a delete then an insert for the same fd,
          * if it happens, then the owner will no longer be the expected
@@ -520,17 +522,31 @@ int fd_takeover(int fd, void *expected_owner)
         if (fdtab[fd].owner != expected_owner)
                 return -1;
  
-       /* we must be alone to work on this idle FD. If not, it means that its
-        * poller is currently waking up and is about to use it, likely to
-        * close it on shut/error, but maybe also to process any unexpectedly
-        * pending data. It's also possible that the FD was closed and
-        * reassigned to another thread group, so let's be careful.
-        */
-       if (unlikely(!fd_grab_tgid(fd, ti->tgid)))
-               return -1;
+       /* We're taking a connection from a different thread group */
+       if ((fdtab[fd].refc_tgid & 0x7fff) != tgid) {
+               changing_tgid = 1;
+
+               old_tgid = fd_tgid(fd);
+               BUG_ON(atleast2(fdtab[fd].thread_mask));
+               old_ltid = my_ffsl(fdtab[fd].thread_mask) - 1;
+
+               if (unlikely(!fd_lock_tgid_cur(fd)))
+                       return -1;
+       } else {
+               /* we must be alone to work on this idle FD. If not, it means that its
+                * poller is currently waking up and is about to use it, likely to
+                * close it on shut/error, but maybe also to process any unexpectedly
+                * pending data. It's also possible that the FD was closed and
+                * reassigned to another thread group, so let's be careful.
+                */
+               if (unlikely(!fd_grab_tgid(fd, ti->tgid)))
+                       return -1;
+       }
  
         old = 0;
         if (!HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old, ti->ltid_bit)) {
+               if (changing_tgid)
+                       fd_unlock_tgid(fd);
                 fd_drop_tgid(fd);
                 return -1;
         }
@@ -538,6 +554,17 @@ int fd_takeover(int fd, void *expected_owner)
         /* success, from now on it's ours */
         HA_ATOMIC_STORE(&fdtab[fd].thread_mask, ti->ltid_bit);
  
+       /*
+        * Change the tgid to our own tgid.
+        * This removes the lock, we don't need it anymore, but we keep
+        * the refcount.
+        */
+       if (changing_tgid) {
+               fd_update_tgid(fd, tgid);
+               if (cur_poller.fixup_tgid_takeover)
+                       cur_poller.fixup_tgid_takeover(&cur_poller, fd, old_ltid, old_tgid);
+       }
+
         /* Make sure the FD doesn't have the active bit. It is possible that
          * the fd is polled by the thread that used to own it, the new thread
          * is supposed to call subscribe() later, to activate polling.
author	Olivier Houchard <ohouchard@haproxy.com>
	Thu, 30 Jan 2025 10:16:40 +0000 (11:16 +0100)
committer	Olivier Houchard <cognet@ci0.org>
	Wed, 26 Feb 2025 12:00:18 +0000 (13:00 +0100)
doc/configuration.txt		patch \| blob \| blame \| history
include/haproxy/fd.h		patch \| blob \| blame \| history
include/haproxy/global-t.h		patch \| blob \| blame \| history
src/backend.c		patch \| blob \| blame \| history
src/cfgparse-global.c		patch \| blob \| blame \| history
src/fd.c		patch \| blob \| blame \| history