- tune.rcvbuf.frontend
- tune.rcvbuf.server
- tune.recv_enough
+ - tune.ring.queues
- tune.runqueue-depth
- tune.sched.low-latency
- tune.sndbuf.backend
may be changed by this setting to better deal with workloads involving lots
of short messages such as telnet or SSH sessions.
+tune.ring.queues <number>
+ Sets the number of write queues in front of ring buffers. This can have an
+ effect on the CPU usage of traces during debugging sessions, and both too
+ low or too large a value can have an important effect. The good value was
+ determined experimentally by developers and there should be no reason to
+ try to change it unless instructed to do so in order to try to address
+ specific issues. Such a setting should not be left in the configuration
+ across version upgrades because its optimal value may evolve over time.
+
tune.runqueue-depth <number>
Sets the maximum amount of task that can be processed at once when running
tasks. The default value depends on the number of threads but sits between 35
# endif
#endif
+/* number of ring wait queues depending on the number
+ * of threads.
+ */
+#ifndef RING_WAIT_QUEUES
+# if defined(USE_THREAD) && MAX_THREADS >= 32
+# define RING_WAIT_QUEUES 16
+# elif defined(USE_THREAD)
+# define RING_WAIT_QUEUES ((MAX_THREADS + 1) / 2)
+# else
+# define RING_WAIT_QUEUES 1
+# endif
+#endif
+
+/* it has been found that 6 queues was optimal on various archs at various
+ * thread counts, so let's use that by default.
+ */
+#ifndef RING_DFLT_QUEUES
+# define RING_DFLT_QUEUES 6
+#endif
+
#endif /* _HAPROXY_DEFAULTS_H */
int nb_stk_ctr; /* number of stick counters, defaults to MAX_SESS_STKCTR */
int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */
uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */
+ uint ring_queues; /* if >0, #ring queues, otherwise equals #thread groups */
#ifdef USE_QUIC
unsigned int quic_backend_max_idle_timeout;
unsigned int quic_frontend_max_idle_timeout;
/* keep the queue in a separate cache line below */
THREAD_PAD(64 - 3*sizeof(void*) - 4*sizeof(int));
- struct ring_wait_cell *queue; // wait queue
-
- /* and leave a spacer after it to avoid false sharing */
- THREAD_PAD(64 - sizeof(void*));
+ struct {
+ struct ring_wait_cell *ptr;
+ THREAD_PAD(64 - sizeof(void*));
+ } queue[RING_WAIT_QUEUES + 1]; // wait queue + 1 spacer
};
#endif /* _HAPROXY_RING_T_H */
uint tid, ltid; /* process-wide and group-wide thread ID (start at 0) */
ulong ltid_bit; /* bit masks for the tid/ltid */
uint tgid; /* ID of the thread group this thread belongs to (starts at 1; 0=unset) */
- /* 32-bit hole here */
+ uint ring_queue; /* queue number for the rings */
ullong pth_id; /* the pthread_t cast to a ullong */
void *stack_top; /* the top of the stack when entering the thread */
#endif
ha_thread_info[tid].stack_top = __builtin_frame_address(0);
+ /* Assign the ring queue. Contrary to an intuitive thought, this does
+ * not benefit from locality and it's counter-productive to group
+ * threads from a same group or range number in the same queue. In some
+ * sense it arranges us because it means we can use a modulo and ensure
+ * that even small numbers of threads are well spread.
+ */
+ ha_thread_info[tid].ring_queue =
+ (tid % MIN(global.nbthread,
+ (global.tune.ring_queues ?
+ global.tune.ring_queues :
+ RING_DFLT_QUEUES))) % RING_WAIT_QUEUES;
+
/* thread is started, from now on it is not idle nor harmless */
thread_harmless_end();
thread_idle_end();
#include <haproxy/api.h>
#include <haproxy/applet.h>
#include <haproxy/buf.h>
+#include <haproxy/cfgparse.h>
#include <haproxy/cli.h>
#include <haproxy/ring.h>
#include <haproxy/sc_strm.h>
ring->storage = area;
ring->pending = 0;
ring->waking = 0;
- ring->queue = NULL;
+ memset(&ring->queue, 0, sizeof(ring->queue));
if (reset) {
ring->storage->size = size - sizeof(*ring->storage);
return max;
}
+/* config parser for global "tune.ring.queues", accepts a number from 0 to RING_WAIT_QUEUES */
+static int cfg_parse_tune_ring_queues(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int queues;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ queues = atoi(args[1]);
+ if (queues < 0 || queues > RING_WAIT_QUEUES) {
+ memprintf(err, "'%s' expects a number between 0 and %d but got '%s'.", args[0], RING_WAIT_QUEUES, args[1]);
+ return -1;
+ }
+
+ global.tune.ring_queues = queues;
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.ring.queues", cfg_parse_tune_ring_queues },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
/*
* Local variables:
* c-indent-level: 8