From: Willy Tarreau Date: Thu, 18 Jun 2020 06:58:47 +0000 (+0200) Subject: MEDIUM: fd: add experimental support for edge-triggered polling X-Git-Tag: v2.2-dev10~17 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bc52bec163de67e329c0a7941fd26add0eb92d45;p=thirdparty%2Fhaproxy.git MEDIUM: fd: add experimental support for edge-triggered polling Some of the recent optimizations around the polling to save a few epoll_ctl() calls have shown that they could also cause some trouble. However, over time our code base has become totally asynchronous with I/Os always attempted from the upper layers and only retried at the bottom, making it look like we're getting closer to EPOLLET support. There are showstoppers there such as the listeners which cannot support this. But given that most of the epoll_ctl() dance comes from the connections, we can try to enable edge-triggered polling on connections. What this patch does is to add a new global tunable "tune.fd.edge-triggered", that makes fd_insert() automatically set an et_possible bit on the fd if the I/O callback is conn_fd_handler. When the epoll code sees an update for such an FD, it immediately registers it in both directions the first time and doesn't update it anymore. On a few tests it proved quite useful with a 14% request rate increase in a H2->H1 scenario, reducing the epoll_ctl() calls from 2 per request to 2 per connection. The option is obviously disabled by default as bugs are still expected, particularly around the subscribe() code where it is possible that some layers do not always re-attempt reading data after being woken up. --- diff --git a/doc/configuration.txt b/doc/configuration.txt index 0b9776cfbd..f8c29b2417 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -675,6 +675,7 @@ The following keywords are supported in the "global" section : - tune.bufsize - tune.chksize - tune.comp.maxlevel + - tune.fd.edge-triggered - tune.h2.header-table-size - tune.h2.initial-window-size - tune.h2.max-concurrent-streams @@ -1874,6 +1875,13 @@ tune.fail-alloc success). This is useful to debug and make sure memory failures are handled gracefully. +tune.fd.edge-triggered { on | off } [ EXPERIMENTAL ] + Enables ('on') or disables ('off') the edge-triggered polling mode for FDs + that support it. This is currently only support with epoll. It may noticeably + reduce the number of epoll_ctl() calls and slightly improve performance in + certain scenarios. This is still experimental, it may result in frozen + connections if bugs are still present, and is disabled by default. + tune.h2.header-table-size Sets the HTTP/2 dynamic header table size. It defaults to 4096 bytes and cannot be larger than 65536 bytes. A larger value may help certain clients diff --git a/include/haproxy/fd-t.h b/include/haproxy/fd-t.h index 5e17b6fe0e..97b383cebf 100644 --- a/include/haproxy/fd-t.h +++ b/include/haproxy/fd-t.h @@ -133,6 +133,7 @@ struct fdtab { unsigned char linger_risk:1; /* 1 if we must kill lingering before closing */ unsigned char cloned:1; /* 1 if a cloned socket, requires EPOLL_CTL_DEL on close */ unsigned char initialized:1; /* 1 if init phase was done on this fd (e.g. set non-blocking) */ + unsigned char et_possible:1; /* 1 if edge-triggered is possible on this FD */ } THREAD_ALIGNED(64); /* polled mask, one bit per thread and per direction for each FD */ diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h index 0f1799d346..f7af4e162d 100644 --- a/include/haproxy/fd.h +++ b/include/haproxy/fd.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -435,6 +436,7 @@ static inline void fd_update_events(int fd, unsigned char evts) static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned long thread_mask) { int locked = fdtab[fd].running_mask != tid_bit; + extern void conn_fd_handler(int); if (locked) fd_set_running_excl(fd); @@ -443,6 +445,12 @@ static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned fdtab[fd].ev = 0; fdtab[fd].linger_risk = 0; fdtab[fd].cloned = 0; + fdtab[fd].et_possible = 0; + + /* conn_fd_handler should support edge-triggered FDs */ + if ((global.tune.options & GTUNE_FD_ET) && fdtab[fd].iocb == conn_fd_handler) + fdtab[fd].et_possible = 1; + fdtab[fd].thread_mask = thread_mask; /* note: do not reset polled_mask here as it indicates which poller * still knows this FD from a possible previous round. diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index f5bf216299..c7591b4675 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -66,6 +66,7 @@ #define GTUNE_STRICT_LIMITS (1<<15) #define GTUNE_INSECURE_FORK (1<<16) #define GTUNE_INSECURE_SETUID (1<<17) +#define GTUNE_FD_ET (1<<18) /* SSL server verify mode */ enum { diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 5102b10760..92c000f859 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -59,6 +59,20 @@ static void _update_fd(int fd) en = fdtab[fd].state; + /* Try to force EPOLLET on FDs that support it */ + if (fdtab[fd].et_possible) { + /* already done ? */ + if (polled_mask[fd].poll_recv & polled_mask[fd].poll_send & tid_bit) + return; + + /* enable ET polling in both directions */ + _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, tid_bit); + _HA_ATOMIC_OR(&polled_mask[fd].poll_send, tid_bit); + opcode = EPOLL_CTL_ADD; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLOUT | EPOLLET; + goto done; + } + /* if we're already polling or are going to poll for this FD and it's * neither active nor ready, force it to be active so that we don't * needlessly unsubscribe then re-subscribe it. @@ -120,6 +134,7 @@ static void _update_fd(int fd) if (en & FD_EV_ACTIVE_W) ev.events |= EPOLLOUT; + done: ev.data.fd = fd; epoll_ctl(epoll_fd[tid], opcode, fd, &ev); } diff --git a/src/fd.c b/src/fd.c index 1e1c0cbc52..60ad69901e 100644 --- a/src/fd.c +++ b/src/fd.c @@ -88,9 +88,11 @@ #endif #include +#include #include #include #include +#include struct fdtab *fdtab = NULL; /* array of all the file descriptors */ @@ -807,6 +809,33 @@ int fork_poller() return 1; } +/* config parser for global "tune.fd.edge-triggered", accepts "on" or "off" */ +static int cfg_parse_tune_fd_edge_triggered(char **args, int section_type, struct proxy *curpx, + struct proxy *defpx, const char *file, int line, + char **err) +{ + if (too_many_args(1, args, err, NULL)) + return -1; + + if (strcmp(args[1], "on") == 0) + global.tune.options |= GTUNE_FD_ET; + else if (strcmp(args[1], "off") == 0) + global.tune.options &= ~GTUNE_FD_ET; + else { + memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]); + return -1; + } + return 0; +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.fd.edge-triggered", cfg_parse_tune_fd_edge_triggered }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); + REGISTER_PER_THREAD_ALLOC(alloc_pollers_per_thread); REGISTER_PER_THREAD_INIT(init_pollers_per_thread); REGISTER_PER_THREAD_DEINIT(deinit_pollers_per_thread);