MEDIUM: listener: add the "shards" bind keyword

author Willy Tarreau <w@1wt.eu>

Tue, 12 Oct 2021 13:23:03 +0000 (15:23 +0200)

committer Willy Tarreau <w@1wt.eu>

Thu, 14 Oct 2021 19:27:48 +0000 (21:27 +0200)
author Willy Tarreau <w@1wt.eu>
Tue, 12 Oct 2021 13:23:03 +0000 (15:23 +0200)
committer Willy Tarreau <w@1wt.eu>
Thu, 14 Oct 2021 19:27:48 +0000 (21:27 +0200)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index 982d9b374515a72caef6e03a7be3a261b862fae3..e58d6d3e2ca388c249c5755d5b8ee7a32db0a321 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -14079,6 +14079,24 @@ proto <name>
    instance, it is possible to force the http/2 on clear TCP by specifying "proto
    h2" on the bind line.
  
+shards <number> | by-thread
+  In multi-threaded mode, on operating systems supporting multiple listeners on
+  the same IP:port, this will automatically create this number of multiple
+  identical listeners for the same line, all bound to a fair share of the number
+  of the threads attached to this listener. This can sometimes be useful when
+  using very large thread counts where the in-kernel locking on a single socket
+  starts to cause a significant overhead. In this case the incoming traffic is
+  distributed over multiple sockets and the contention is reduced. Note that
+  doing this can easily increase the CPU usage by making more threads work a
+  little bit.
+
+  If the number of shards is higher than the number of available threads, it
+  will automatically be trimmed to the number of threads (i.e. one shard per
+  thread). The special "by-thread" value also creates as many shards as there
+  are threads on the "bind" line. Since the system will evenly distribute the
+  incoming traffic between all these shards, it is important that this number
+  is an integral divisor of the number of threads.
+
  ssl
    This setting is only available when support for OpenSSL was built in. It
    enables SSL deciphering on connections instantiated from this listener. A
diff --git a/include/haproxy/receiver-t.h b/include/haproxy/receiver-t.h

index 481ac644347f68af42c884c3a45b191270e006b9..3e1ff81dbf82426a882d36d41e8d996ea3a1fe80 100644 (file)
--- a/include/haproxy/receiver-t.h
+++ b/include/haproxy/receiver-t.h
@@ -49,6 +49,7 @@ struct rx_settings {
         char *interface;                  /* interface name or NULL */
         const struct netns_entry *netns;  /* network namespace of the listener*/
         unsigned int options;             /* receiver options (RX_O_*) */
+       uint shards;                      /* number of shards */
  };
  
  /* This describes a receiver with all its characteristics (address, options, etc) */
diff --git a/src/cfgparse.c b/src/cfgparse.c

index ada344d17970004d8e64ce8353de00d0f545811d..173dcdb3254a7ef5347613ea36aaf9b662558560 100644 (file)
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -2566,8 +2566,54 @@ int check_config_validity()
  
                         /* apply thread masks and groups to all receivers */
                         list_for_each_entry(li, &bind_conf->listeners, by_bind) {
-                               li->rx.bind_thread = bind_conf->bind_thread;
-                               li->rx.bind_tgroup = bind_conf->bind_tgroup;
+                               if (bind_conf->settings.shards <= 1) {
+                                       li->rx.bind_thread = bind_conf->bind_thread;
+                                       li->rx.bind_tgroup = bind_conf->bind_tgroup;
+                               } else {
+                                       struct listener *new_li;
+                                       int shard, shards, todo, done, bit;
+                                       ulong mask;
+
+                                       shards = bind_conf->settings.shards;
+                                       todo = my_popcountl(bind_conf->bind_thread);
+
+                                       /* no more shards than total threads */
+                                       if (shards > todo)
+                                               shards = todo;
+
+                                       shard = done = bit = 0;
+                                       new_li = li;
+
+                                       while (1) {
+                                               mask = 0;
+                                               while (done < todo) {
+                                                       /* enlarge mask to cover next bit of bind_thread */
+                                                       while (!(bind_conf->bind_thread & (1UL << bit)))
+                                                               bit++;
+                                                       mask |= (1UL << bit);
+                                                       bit++;
+                                                       done += shards;
+                                               }
+
+                                               new_li->rx.bind_thread = bind_conf->bind_thread & mask;
+                                               new_li->rx.bind_tgroup = bind_conf->bind_tgroup;
+                                               done -= todo;
+
+                                               shard++;
+                                               if (shard >= shards)
+                                                       break;
+
+                                               /* create another listener for new shards */
+                                               new_li = clone_listener(li);
+                                               if (!new_li) {
+                                                       ha_alert("Out of memory while trying to allocate extra listener for shard %d in %s %s\n",
+                                                                shard, proxy_type_str(curproxy), curproxy->id);
+                                                       cfgerr++;
+                                                       err_code |= ERR_FATAL | ERR_ALERT;
+                                                       goto out;
+                                               }
+                                       }
+                               }
                         }
                 }
  
diff --git a/src/listener.c b/src/listener.c

index 044d22390ff37d0903dd5378216f32f003d5af92..f16ba2d0ac17428bddd14e304110c2e95e08f71c 100644 (file)
--- a/src/listener.c
+++ b/src/listener.c
@@ -1360,6 +1360,7 @@ struct bind_conf *bind_conf_alloc(struct proxy *fe, const char *file,
         bind_conf->settings.ux.uid = -1;
         bind_conf->settings.ux.gid = -1;
         bind_conf->settings.ux.mode = 0;
+       bind_conf->settings.shards = 1;
         bind_conf->xprt = xprt;
         bind_conf->frontend = fe;
         bind_conf->severity_output = CLI_SEVERITY_NONE;
@@ -1639,6 +1640,30 @@ static int bind_parse_proto(char **args, int cur_arg, struct proxy *px, struct b
         return 0;
  }
  
+/* parse the "shards" bind keyword. Takes an integer or "by-thread" */
+static int bind_parse_shards(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+       int val;
+
+       if (!*args[cur_arg + 1]) {
+               memprintf(err, "'%s' : missing value", args[cur_arg]);
+               return ERR_ALERT | ERR_FATAL;
+       }
+
+       if (strcmp(args[cur_arg + 1], "by-thread") == 0) {
+               val = MAX_THREADS; /* will be trimmed later anyway */
+       } else {
+               val = atol(args[cur_arg + 1]);
+               if (val < 1 || val > MAX_THREADS) {
+                       memprintf(err, "'%s' : invalid value %d, allowed range is %d..%d or 'by-thread'", args[cur_arg], val, 1, MAX_THREADS);
+                       return ERR_ALERT | ERR_FATAL;
+               }
+       }
+
+       conf->settings.shards = val;
+       return 0;
+}
+
  /* parse the "thread" bind keyword */
  static int bind_parse_thread(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
  {
@@ -1734,6 +1759,7 @@ static struct bind_kw_list bind_kws = { "ALL", { }, {
         { "nice",         bind_parse_nice,         1 }, /* set nice of listening socket */
         { "process",      bind_parse_process,      1 }, /* set list of allowed process for this socket */
         { "proto",        bind_parse_proto,        1 }, /* set the proto to use for all incoming connections */
+       { "shards",       bind_parse_shards,       1 }, /* set number of shards */
         { "thread",       bind_parse_thread,       1 }, /* set list of allowed threads for this socket */
         { /* END */ },
  }};
author	Willy Tarreau <w@1wt.eu>
	Tue, 12 Oct 2021 13:23:03 +0000 (15:23 +0200)
committer	Willy Tarreau <w@1wt.eu>
	Thu, 14 Oct 2021 19:27:48 +0000 (21:27 +0200)
doc/configuration.txt		patch \| blob \| blame \| history
include/haproxy/receiver-t.h		patch \| blob \| blame \| history
src/cfgparse.c		patch \| blob \| blame \| history
src/listener.c		patch \| blob \| blame \| history