]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: listener: make the accept function more robust against pauses
authorWilly Tarreau <w@1wt.eu>
Wed, 7 May 2014 17:47:02 +0000 (19:47 +0200)
committerWilly Tarreau <w@1wt.eu>
Wed, 7 May 2014 21:13:08 +0000 (23:13 +0200)
During some tests in multi-process mode under Linux, it appeared that
issuing "disable frontend foo" on the CLI to pause a listener would
make the shutdown(read) of certain processes disturb another process
listening on the same socket, resulting in a 100% CPU loop. What
happens is that accept() returns EAGAIN without accepting anything.
Fortunately, we see that epoll_wait() reports EPOLLIN+EPOLLRDHUP
(likely because the FD points to the same file in the kernel), so we
can use that to stop the other process from trying to accept connections
for a short time and try again later, hoping for the situation to change.
We must not disable the FD otherwise there's no way to re-enable it.

Additionally, during these tests, a loop was encountered on EINVAL which
was not caught. Now if we catch an EINVAL, we proceed the same way, in
case the socket is re-enabled later.

src/listener.c

index 7ab1a87d14812c0bbd34c7e60d173152be648506..4a55e5a3d23879657e0bf0f682a9d7e6ff948440 100644 (file)
@@ -257,6 +257,7 @@ void listener_accept(int fd)
        struct listener *l = fdtab[fd].owner;
        struct proxy *p = l->frontend;
        int max_accept = l->maxaccept ? l->maxaccept : 1;
+       int expire;
        int cfd;
        int ret;
 #ifdef USE_ACCEPT4
@@ -270,14 +271,11 @@ void listener_accept(int fd)
 
        if (!(l->options & LI_O_UNLIMITED) && global.sps_lim) {
                int max = freq_ctr_remain(&global.sess_per_sec, global.sps_lim, 0);
-               int expire;
 
                if (unlikely(!max)) {
                        /* frontend accept rate limit was reached */
-                       limit_listener(l, &global_listener_queue);
                        expire = tick_add(now_ms, next_event_delay(&global.sess_per_sec, global.sps_lim, 0));
-                       task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
-                       return;
+                       goto wait_expire;
                }
 
                if (max_accept > max)
@@ -286,14 +284,11 @@ void listener_accept(int fd)
 
        if (!(l->options & LI_O_UNLIMITED) && global.cps_lim) {
                int max = freq_ctr_remain(&global.conn_per_sec, global.cps_lim, 0);
-               int expire;
 
                if (unlikely(!max)) {
                        /* frontend accept rate limit was reached */
-                       limit_listener(l, &global_listener_queue);
                        expire = tick_add(now_ms, next_event_delay(&global.conn_per_sec, global.cps_lim, 0));
-                       task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
-                       return;
+                       goto wait_expire;
                }
 
                if (max_accept > max)
@@ -302,14 +297,11 @@ void listener_accept(int fd)
 #ifdef USE_OPENSSL
        if (!(l->options & LI_O_UNLIMITED) && global.ssl_lim && l->bind_conf && l->bind_conf->is_ssl) {
                int max = freq_ctr_remain(&global.ssl_per_sec, global.ssl_lim, 0);
-               int expire;
 
                if (unlikely(!max)) {
                        /* frontend accept rate limit was reached */
-                       limit_listener(l, &global_listener_queue);
                        expire = tick_add(now_ms, next_event_delay(&global.ssl_per_sec, global.ssl_lim, 0));
-                       task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
-                       return;
+                       goto wait_expire;
                }
 
                if (max_accept > max)
@@ -365,8 +357,20 @@ void listener_accept(int fd)
                if (unlikely(cfd == -1)) {
                        switch (errno) {
                        case EAGAIN:
+                               if (fdtab[fd].ev & FD_POLL_HUP) {
+                                       /* the listening socket might have been disabled in a shared
+                                        * process and we're a collateral victim. We'll just pause for
+                                        * a while in case it comes back. In the mean time, we need to
+                                        * clear this sticky flag.
+                                        */
+                                       fdtab[fd].ev &= ~FD_POLL_HUP;
+                                       goto transient_error;
+                               }
                                fd_cant_recv(fd);
                                return;   /* nothing more to accept */
+                       case EINVAL:
+                               /* might be trying to accept on a shut fd (eg: soft stop) */
+                               goto transient_error;
                        case EINTR:
                        case ECONNABORTED:
                                continue;
@@ -375,26 +379,20 @@ void listener_accept(int fd)
                                        send_log(p, LOG_EMERG,
                                                 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
                                                 p->id, maxfd);
-                               limit_listener(l, &global_listener_queue);
-                               task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
-                               return;
+                               goto transient_error;
                        case EMFILE:
                                if (p)
                                        send_log(p, LOG_EMERG,
                                                 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
                                                 p->id, maxfd);
-                               limit_listener(l, &global_listener_queue);
-                               task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
-                               return;
+                               goto transient_error;
                        case ENOBUFS:
                        case ENOMEM:
                                if (p)
                                        send_log(p, LOG_EMERG,
                                                 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
                                                 p->id, maxfd);
-                               limit_listener(l, &global_listener_queue);
-                               task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
-                               return;
+                               goto transient_error;
                        default:
                                /* unexpected result, let's give up and let other tasks run */
                                goto stop;
@@ -442,9 +440,7 @@ void listener_accept(int fd)
                        if (ret == 0) /* successful termination */
                                continue;
 
-                       limit_listener(l, &global_listener_queue);
-                       task_schedule(global_listener_queue_task, tick_add(now_ms, 100)); /* try again in 100 ms */
-                       return;
+                       goto transient_error;
                }
 
                if (l->nbconn >= l->maxconn) {
@@ -473,6 +469,15 @@ void listener_accept(int fd)
  stop:
        fd_done_recv(fd);
        return;
+
+ transient_error:
+       /* pause the listener and try again in 100 ms */
+       expire = tick_add(now_ms, 100);
+
+ wait_expire:
+       limit_listener(l, &global_listener_queue);
+       task_schedule(global_listener_queue_task, tick_first(expire, global_listener_queue_task->expire));
+       return;
 }
 
 /*