MEDIUM: dynbuf: implement emergency buffers

author Willy Tarreau <w@1wt.eu>

Fri, 26 Apr 2024 14:40:32 +0000 (16:40 +0200)

committer Willy Tarreau <w@1wt.eu>

Fri, 10 May 2024 15:18:13 +0000 (17:18 +0200)
author Willy Tarreau <w@1wt.eu>
Fri, 26 Apr 2024 14:40:32 +0000 (16:40 +0200)
committer Willy Tarreau <w@1wt.eu>
Fri, 10 May 2024 15:18:13 +0000 (17:18 +0200)
diff --git a/doc/configuration.txt b/doc/configuration.txt

index bb3515bbc3a18c6c0e2de9eb3b42ce22af48d921..6cc616afbb7bc332ce4027d6c89132eaf4c25b4a 100644 (file)
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -3104,26 +3104,24 @@ tune.applet.zero-copy-forwarding { on | off }
  
  tune.buffers.limit <number>
    Sets a hard limit on the number of buffers which may be allocated per process.
-  The default value is zero which means unlimited. The minimum non-zero value
-  will always be greater than "tune.buffers.reserve" and should ideally always
-  be about twice as large. Forcing this value can be particularly useful to
-  limit the amount of memory a process may take, while retaining a sane
-  behavior. When this limit is reached, streams which need a buffer wait for
-  another one to be released by another stream. Since buffers are dynamically
-  allocated and released, the waiting time is very short and not perceptible
-  provided that limits remain reasonable. In fact sometimes reducing the limit
-  may even increase performance by increasing the CPU cache's efficiency. Tests
-  have shown good results on average HTTP traffic with a limit to 1/10 of the
-  expected global maxconn setting, which also significantly reduces memory
-  usage. The memory savings come from the fact that a number of connections
-  will not allocate 2*tune.bufsize. It is best not to touch this value unless
-  advised to do so by an HAProxy core developer.
+  The default value is zero which means unlimited. The limit will automatically
+  be re-adjusted to satisfy the reserved buffers for emergency situations so
+  that the user doesn't have to perform complicated calculations. Forcing this
+  value can be particularly useful to limit the amount of memory a process may
+  take, while retaining a sane behavior. When this limit is reached, a task
+  that requests a buffer waits for another one to be released first. Most of
+  the time the waiting time is very short and not perceptible provided that
+  limits remain reasonable. However, some historical limitations have weakened
+  this mechanism over versions and it is known that in certain situations of
+  sustained shortage, some tasks may freeze until their timeout expires, so it
+  is safer to avoid using this when not strictly necessary.
  
  tune.buffers.reserve <number>
-  Sets the number of buffers which are pre-allocated and reserved for use only
-  during memory shortage conditions resulting in failed memory allocations. The
-  minimum value is 2 and is also the default. There is no reason a user would
-  want to change this value, it's mostly aimed at HAProxy core developers.
+  Sets the number of per-thread buffers which are pre-allocated and reserved
+  for use only during memory shortage conditions resulting in failed memory
+  allocations. The minimum value is 0 and the default is 4. There is no reason
+  a user would want to change this value, unless a core developer suggests to
+  change it for a very specific reason.
  
  tune.bufsize <number>
    Sets the buffer size to this size (in bytes). Lower values allow more
diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h

index 336e550d83015c6483fb6b9aaef07cb2155d7689..eda346aa2830944e924837e5b4dce6a3abc1b4fe 100644 (file)
--- a/include/haproxy/defaults.h
+++ b/include/haproxy/defaults.h
@@ -71,18 +71,9 @@
  #define BUFSIZE                16384
  #endif
  
-/* certain buffers may only be allocated for responses in order to avoid
- * deadlocks caused by request queuing. 2 buffers is the absolute minimum
- * acceptable to ensure that a request gaining access to a server can get
- * a response buffer even if it doesn't completely flush the request buffer.
- * The worst case is an applet making use of a request buffer that cannot
- * completely be sent while the server starts to respond, and all unreserved
- * buffers are allocated by request buffers from pending connections in the
- * queue waiting for this one to flush. Both buffers reserved buffers may
- * thus be used at the same time.
- */
+// number of per-thread emergency buffers for low-memory conditions
  #ifndef RESERVED_BUFS
-#define RESERVED_BUFS   2
+#define RESERVED_BUFS   4
  #endif
  
  // reserved buffer space for header rewriting
diff --git a/include/haproxy/dynbuf.h b/include/haproxy/dynbuf.h

index c4c28538d2b5551775377355c323eaeb483e1e05..31d9dd1052f623e2e94a13c4597279b883bbeab6 100644 (file)
--- a/include/haproxy/dynbuf.h
+++ b/include/haproxy/dynbuf.h
@@ -70,6 +70,20 @@ static inline int b_may_alloc_for_crit(uint crit)
         return 1;
  }
  
+/* Allocates one of the emergency buffers or returns NULL if there are none left */
+static inline char *__b_get_emergency_buf(void)
+{
+       char *ret;
+
+       if (!th_ctx->emergency_bufs_left)
+               return NULL;
+
+       th_ctx->emergency_bufs_left--;
+       ret = th_ctx->emergency_bufs[th_ctx->emergency_bufs_left];
+       th_ctx->emergency_bufs[th_ctx->emergency_bufs_left] = NULL;
+       return ret;
+}
+
  /* Ensures that <buf> is allocated, or allocates it. If no memory is available,
   * ((char *)1) is assigned instead with a zero size. The allocated buffer is
   * returned, or NULL in case no memory is available. Since buffers only contain
@@ -112,7 +126,10 @@ static inline int b_may_alloc_for_crit(uint crit)
                  */                                                                 \
                 *(_buf) = BUF_NULL;                                     \
                 __ha_barrier_store();                                   \
-               pool_free(pool_head_buffer, area);                      \
+               if (th_ctx->emergency_bufs_left < global.tune.reserved_bufs) \
+                       th_ctx->emergency_bufs[th_ctx->emergency_bufs_left++] = area; \
+               else                                                    \
+                       pool_free(pool_head_buffer, area);              \
         } while (0)                                                     \
  
  /* Releases buffer <buf> if allocated, and marks it empty. */
diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h

index 5564a70b5c8fa2951899422e90040fbe71a5b673..636d5b2e51ba97993558d4ce2f63e95abd848afe 100644 (file)
--- a/include/haproxy/tinfo-t.h
+++ b/include/haproxy/tinfo-t.h
@@ -149,7 +149,10 @@ struct thread_ctx {
         struct list quic_conns_clo;         /* list of closing quic-conns attached to this thread */
         struct list queued_checks;          /* checks waiting for a connection slot */
         struct list tasklets[TL_CLASSES];   /* tasklets (and/or tasks) to run, by class */
-       // around 48 bytes here for thread-local variables
+
+       void **emergency_bufs;              /* array of buffers allocated at boot. Next free one is [emergency_bufs_left-1] */
+       uint emergency_bufs_left;           /* number of emergency buffers left in magic_bufs[] */
+       // around 36 bytes here for thread-local variables
  
         // third cache line here on 64 bits: accessed mostly using atomic ops
         ALWAYS_ALIGN(64);
diff --git a/src/dynbuf.c b/src/dynbuf.c

index a849aeeb93b92250486e922a911d750fcbdb0fba..aec966701b13fe4dd970829b2d7453f9721f2df9 100644 (file)
--- a/src/dynbuf.c
+++ b/src/dynbuf.c
@@ -163,8 +163,6 @@ static int cfg_parse_tune_buffers_limit(char **args, int section_type, struct pr
         if (global.tune.buf_limit) {
                 if (global.tune.buf_limit < 3)
                         global.tune.buf_limit = 3;
-               if (global.tune.buf_limit <= global.tune.reserved_bufs)
-                       global.tune.buf_limit = global.tune.reserved_bufs + 1;
         }
  
         return 0;
@@ -187,14 +185,44 @@ static int cfg_parse_tune_buffers_reserve(char **args, int section_type, struct
         }
  
         global.tune.reserved_bufs = reserve;
-       if (global.tune.reserved_bufs < 2)
-               global.tune.reserved_bufs = 2;
-       if (global.tune.buf_limit && global.tune.buf_limit <= global.tune.reserved_bufs)
-               global.tune.buf_limit = global.tune.reserved_bufs + 1;
-
         return 0;
  }
  
+/* allocate emergency buffers for the thread */
+static int alloc_emergency_buffers_per_thread(void)
+{
+       int idx;
+
+       th_ctx->emergency_bufs_left = global.tune.reserved_bufs;
+       th_ctx->emergency_bufs = calloc(global.tune.reserved_bufs, sizeof(*th_ctx->emergency_bufs));
+       if (!th_ctx->emergency_bufs)
+               return 0;
+
+       for (idx = 0; idx < global.tune.reserved_bufs; idx++) {
+               /* reserved bufs are not subject to the limit, so we must push it */
+               if (_HA_ATOMIC_LOAD(&pool_head_buffer->limit))
+                       _HA_ATOMIC_INC(&pool_head_buffer->limit);
+               th_ctx->emergency_bufs[idx] = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL);
+               if (!th_ctx->emergency_bufs[idx])
+                       return 0;
+       }
+
+       return 1;
+}
+
+/* frees the thread's emergency buffers */
+static void free_emergency_buffers_per_thread(void)
+{
+       int idx;
+
+       if (th_ctx->emergency_bufs) {
+               for (idx = 0; idx < global.tune.reserved_bufs; idx++)
+                       pool_free(pool_head_buffer, th_ctx->emergency_bufs[idx]);
+       }
+
+       ha_free(&th_ctx->emergency_bufs);
+}
+
  /* config keyword parsers */
  static struct cfg_kw_list cfg_kws = {ILH, {
         { CFG_GLOBAL, "tune.buffers.limit", cfg_parse_tune_buffers_limit },
@@ -203,6 +231,8 @@ static struct cfg_kw_list cfg_kws = {ILH, {
  }};
  
  INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+REGISTER_PER_THREAD_ALLOC(alloc_emergency_buffers_per_thread);
+REGISTER_PER_THREAD_FREE(free_emergency_buffers_per_thread);
  
  /*
   * Local variables:
author	Willy Tarreau <w@1wt.eu>
	Fri, 26 Apr 2024 14:40:32 +0000 (16:40 +0200)
committer	Willy Tarreau <w@1wt.eu>
	Fri, 10 May 2024 15:18:13 +0000 (17:18 +0200)
doc/configuration.txt		patch \| blob \| blame \| history
include/haproxy/defaults.h		patch \| blob \| blame \| history
include/haproxy/dynbuf.h		patch \| blob \| blame \| history
include/haproxy/tinfo-t.h		patch \| blob \| blame \| history
src/dynbuf.c		patch \| blob \| blame \| history