MEDIUM: tasks: add a tune.sched.low-latency option

author Willy Tarreau <w@1wt.eu>

Wed, 24 Jun 2020 09:11:02 +0000 (11:11 +0200)

committer Willy Tarreau <w@1wt.eu>

Wed, 24 Jun 2020 10:21:26 +0000 (12:21 +0200)
author Willy Tarreau <w@1wt.eu>
Wed, 24 Jun 2020 09:11:02 +0000 (11:11 +0200)
committer Willy Tarreau <w@1wt.eu>
Wed, 24 Jun 2020 10:21:26 +0000 (12:21 +0200)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index a2ba7cf934c174f2875278c4fc3d879706f71aed..780d27fd936c922e0dee7b213afd0b5913553f31 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -697,6 +697,7 @@ The following keywords are supported in the "global" section :
     - tune.rcvbuf.server
     - tune.recv_enough
     - tune.runqueue-depth
+   - tune.sched.low-latency
     - tune.sndbuf.client
     - tune.sndbuf.server
     - tune.ssl.cachesize
@@ -2095,7 +2096,20 @@ tune.recv_enough <number>
  tune.runqueue-depth <number>
    Sets the maximum amount of task that can be processed at once when running
    tasks. The default value is 200. Increasing it may incur latency when
-  dealing with I/Os, making it too small can incur extra overhead.
+  dealing with I/Os, making it too small can incur extra overhead. When
+  experimenting with much larger values, it may be useful to also enable
+  tune.sched.low-latency to limit the maximum latency to the lowest possible.
+
+tune.sched.low-latency { on | off }
+  Enables ('on') or disables ('off') the low-latency task scheduler. By default
+  haproxy processes tasks from several classes one class at a time as this is
+  the most efficient. But when running with large values of tune.runqueue-depth
+  this can have a measurable effect on request or connection latency. When this
+  low-latency setting is enabled, tasks of lower priority classes will always
+  be executed before other ones if they exist. This will permit to lower the
+  maximum latency experienced by new requests or connections in the middle of
+  massive traffic, at the expense of a higher impact on this large traffic.
+  For regular usage it is better to leave this off. The default value is off.
  
  tune.sndbuf.client <number>
  tune.sndbuf.server <number>
@@ -15838,11 +15852,12 @@ lat_ns_avg : integer
    the value low, it is possible to reduce the scheduler's run queue depth using
    "tune.runqueue-depth", to reduce the number of concurrent events processed at
    once using "tune.maxpollevents", to decrease the stream's nice value using
-  the "nice" option on the "bind" lines or in the frontend, or to look for
-  other heavy requests in logs (those exhibiting large values of "cpu_ns_avg"),
-  whose processing needs to be adjusted or fixed. Compression of large buffers
-  could be a culprit, like heavy regex or long lists of regex.
-  Note: this value is exactly lat_ns_tot divided by cpu_calls.
+  the "nice" option on the "bind" lines or in the frontend, to enable low
+  latency scheduling using "tune.sched.low-latency", or to look for other heavy
+  requests in logs (those exhibiting large values of "cpu_ns_avg"), whose
+  processing needs to be adjusted or fixed. Compression of large buffers could
+  be a culprit, like heavy regex or long lists of regex. Note: this value is
+  exactly lat_ns_tot divided by cpu_calls.
  
  lat_ns_tot : integer
    Returns the total number of nanoseconds spent between the moment the task
@@ -15854,10 +15869,11 @@ lat_ns_tot : integer
    the value low, it is possible to reduce the scheduler's run queue depth using
    "tune.runqueue-depth", to reduce the number of concurrent events processed at
    once using "tune.maxpollevents", to decrease the stream's nice value using
-  the "nice" option on the "bind" lines or in the frontend, or to look for
-  other heavy requests in logs (those exhibiting large values of "cpu_ns_avg"),
-  whose processing needs to be adjusted or fixed. Compression of large buffers
-  could be a culprit, like heavy regex or long lists of regex. Note: while it
+  the "nice" option on the "bind" lines or in the frontend, to enable low
+  latency scheduling using "tune.sched.low-latency", or to look for other heavy
+  requests in logs (those exhibiting large values of "cpu_ns_avg"), whose
+  processing needs to be adjusted or fixed. Compression of large buffers could
+  be a culprit, like heavy regex or long lists of regex. Note: while it
    may intuitively seem that the total latency adds to a transfer time, it is
    almost never true because while a task waits for the CPU, network buffers
    continue to fill up and the next call will process more at once. The value
diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h

index c7591b467508683ee1afce085f09a61b9e707b15..0da246ddee478e5122b7a3a9ce76cca8f4cc6462 100644 (file)
--- a/include/haproxy/global-t.h
+++ b/include/haproxy/global-t.h
@@ -67,6 +67,7 @@
  #define GTUNE_INSECURE_FORK      (1<<16)
  #define GTUNE_INSECURE_SETUID    (1<<17)
  #define GTUNE_FD_ET              (1<<18)
+#define GTUNE_SCHED_LOW_LATENCY  (1<<19)
  
  /* SSL server verify mode */
  enum {
diff --git a/src/task.c b/src/task.c

index 22954adc225c746c9a2ca430d64ce8a4ba63a4ee..6079956baef664e64c98e71936c77b9f73088a07 100644 (file)
--- a/src/task.c
+++ b/src/task.c
@@ -16,6 +16,7 @@
  #include <import/eb32tree.h>
  
  #include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
  #include <haproxy/fd.h>
  #include <haproxy/freq_ctr.h>
  #include <haproxy/list.h>
@@ -328,6 +329,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
         struct task *(*process)(struct task *t, void *ctx, unsigned short state);
         struct list *tl_queues = sched->tasklets;
         struct task *t;
+       uint8_t budget_mask = (1 << TL_CLASSES) - 1;
         unsigned int done = 0;
         unsigned int queue;
         unsigned short state;
@@ -336,6 +338,33 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
         for (queue = 0; queue < TL_CLASSES;) {
                 sched->current_queue = queue;
  
+               /* global.tune.sched.low-latency is set */
+               if (global.tune.options & GTUNE_SCHED_LOW_LATENCY) {
+                       if (unlikely(sched->tl_class_mask & budget_mask & ((1 << queue) - 1))) {
+                               /* a lower queue index has tasks again and still has a
+                                * budget to run them. Let's switch to it now.
+                                */
+                               queue = (sched->tl_class_mask & 1) ? 0 :
+                                       (sched->tl_class_mask & 2) ? 1 : 2;
+                               continue;
+                       }
+
+                       if (unlikely(queue > TL_URGENT &&
+                                    budget_mask & (1 << TL_URGENT) &&
+                                    !MT_LIST_ISEMPTY(&sched->shared_tasklet_list))) {
+                               /* an urgent tasklet arrived from another thread */
+                               break;
+                       }
+
+                       if (unlikely(queue > TL_NORMAL &&
+                                    budget_mask & (1 << TL_NORMAL) &&
+                                    ((sched->rqueue_size > 0) ||
+                                     (global_tasks_mask & tid_bit)))) {
+                               /* a task was woken up by a bulk tasklet or another thread */
+                               break;
+                       }
+               }
+
                 if (LIST_ISEMPTY(&tl_queues[queue])) {
                         sched->tl_class_mask &= ~(1 << queue);
                         queue++;
@@ -343,6 +372,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
                 }
  
                 if (!budgets[queue]) {
+                       budget_mask &= ~(1 << queue);
                         queue++;
                         continue;
                 }
@@ -687,6 +717,32 @@ static void init_task()
         }
  }
  
+/* config parser for global "tune.sched.low-latency", accepts "on" or "off" */
+static int cfg_parse_tune_sched_low_latency(char **args, int section_type, struct proxy *curpx,
+                                      struct proxy *defpx, const char *file, int line,
+                                      char **err)
+{
+       if (too_many_args(1, args, err, NULL))
+               return -1;
+
+       if (strcmp(args[1], "on") == 0)
+               global.tune.options |= GTUNE_SCHED_LOW_LATENCY;
+       else if (strcmp(args[1], "off") == 0)
+               global.tune.options &= ~GTUNE_SCHED_LOW_LATENCY;
+       else {
+               memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+               return -1;
+       }
+       return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+       { CFG_GLOBAL, "tune.sched.low-latency", cfg_parse_tune_sched_low_latency },
+       { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
  INITCALL0(STG_PREPARE, init_task);
  
  /*
author	Willy Tarreau <w@1wt.eu>
	Wed, 24 Jun 2020 09:11:02 +0000 (11:11 +0200)
committer	Willy Tarreau <w@1wt.eu>
	Wed, 24 Jun 2020 10:21:26 +0000 (12:21 +0200)
doc/configuration.txt		patch \| blob \| blame \| history
include/haproxy/global-t.h		patch \| blob \| blame \| history
src/task.c		patch \| blob \| blame \| history