]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
[MEDIUM] implement "http-check disable-on-404" for graceful shutdown
authorWilly Tarreau <w@1wt.eu>
Fri, 30 Nov 2007 09:41:39 +0000 (10:41 +0100)
committerWilly Tarreau <w@1wt.eu>
Fri, 30 Nov 2007 09:41:39 +0000 (10:41 +0100)
When an HTTP server returns "404 not found", it indicates that at least
part of it is still running. For this reason, it can be convenient for
application administrators to be able to consider code 404 as valid,
but for a server which does not want to participate to load balancing
anymore. This is useful to seamlessly exclude a server from a farm
without acting on the load balancer. For instance, let's consider that
haproxy checks for the "/alive" file. To enable load balancing on a
server, the admin would simply do :

  # touch /var/www/alive

And to disable the server, he would simply do :

  # rm /var/www/alive

Another immediate gain from doing this is that it is now possible to
send NOTICE messages instead of ALERT messages when a server is first
disable, then goes down. This provides a graceful shutdown method.

To enable this behaviour, specify "http-check disable-on-404" in the
backend.

include/types/proxy.h
include/types/server.h
src/backend.c
src/cfgparse.c
src/checks.c

index 2a75fad9e6669b32e2caef24b38f52580dc1aa68..fd48e707ff7842287324c1abd5aa3b72b11730eb 100644 (file)
 #define PR_O_TCPSPLICE 0x08000000      /* delegate data transfer to linux kernel's tcp_splice */
 #define PR_O_CONTSTATS 0x10000000      /* continous counters */
 #define PR_O_HTTP_PROXY 0x20000000     /* Enable session to use HTTP proxy operations */
+#define PR_O_DISABLE404 0x40000000      /* Disable a server on a 404 response to a health-check */
 
 /* This structure is used to apply fast weighted round robin on a server group */
 struct fwrr_group {
index 47c152c3d678ea41a2ba6353b52712d8ef8d79c5..b50e1936d8796e63cde028b8020eb3db24c32959 100644 (file)
@@ -41,6 +41,7 @@
 #define SRV_MAPPORTS   0x0004  /* this server uses mapped ports */
 #define SRV_BIND_SRC   0x0008  /* this server uses a specific source address */
 #define SRV_CHECKED    0x0010  /* this server needs to be checked */
+#define SRV_GOINGDOWN  0x0020  /* this server says that it's going down (404) */
 
 #define SRV_TPROXY_ADDR        0x0020  /* bind to this non-local address to reach this server */
 #define SRV_TPROXY_CIP 0x0040  /* bind to the client's IP address to reach this server */
index a8676b3faace569884328f382f82f8e21af4da1d..cc12f7ad62a43029f5bdb2b20586c9726decf0ef 100644 (file)
@@ -60,6 +60,8 @@ static inline int srv_is_usable(int state, int weight)
 {
        if (!weight)
                return 0;
+       if (state & SRV_GOINGDOWN)
+               return 0;
        if (!(state & SRV_RUNNING))
                return 0;
        return 1;
@@ -193,7 +195,8 @@ void recalc_server_map(struct proxy *px)
                int max = 0;
                best = NULL;
                for (cur = px->srv; cur; cur = cur->next) {
-                       if ((cur->state & (SRV_RUNNING | SRV_BACKUP)) == flag) {
+                       if (flag == (cur->state &
+                                    (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) {
                                int v;
 
                                /* If we are forced to return only one server, we don't want to
index affb9569a829d38bc03d94b830d10aff241ae038..f283332b4e00e34e77e9abb3b5c49abf2f9b7514 100644 (file)
@@ -1292,6 +1292,19 @@ int cfg_parse_listen(const char *file, int linenum, char **args)
                /* enable reconnections to dispatch */
                curproxy->options |= PR_O_REDISP;
        }
+       else if (!strcmp(args[0], "http-check")) {
+               if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+                       return 0;
+
+               if (strcmp(args[1], "disable-on-404") == 0) {
+                       /* enable a graceful server shutdown on an HTTP 404 response */
+                       curproxy->options |= PR_O_DISABLE404;
+               }
+               else {
+                       Alert("parsing [%s:%d] : '%s' only supports 'disable-on-404'.\n", file, linenum, args[0]);
+                       return -1;
+               }
+       }
 #ifdef TPROXY
        else if (!strcmp(args[0], "transparent")) {
                /* enable transparent proxy connections */
@@ -2525,6 +2538,11 @@ int readcfgfile(const char *file)
                                Warning("parsing %s : Layer 7 hash not possible for %s '%s'. Falling back to round robin.\n",
                                        file, proxy_type_str(curproxy), curproxy->id);
                        }
+                       if (curproxy->options & PR_O_DISABLE404) {
+                               curproxy->options &= ~PR_O_DISABLE404;
+                               Warning("parsing %s : '%s' will be ignored for %s '%s' (requires HTTP mode).\n",
+                                       file, "disable-on-404", proxy_type_str(curproxy), curproxy->id);
+                       }
                }
 
                if (curproxy->mode == PR_MODE_HEALTH) { /* TCP PROXY or HEALTH CHECK */
@@ -2540,6 +2558,11 @@ int readcfgfile(const char *file)
                                      file, curproxy->id);
                                cfgerr++;
                        }
+                       if ((curproxy->options & PR_O_DISABLE404) && !(curproxy->options & PR_O_HTTP_CHK)) {
+                               curproxy->options &= ~PR_O_DISABLE404;
+                               Warning("parsing %s : '%s' will be ignored for %s '%s' (requires 'option httpchk').\n",
+                                       file, "disable-on-404", proxy_type_str(curproxy), curproxy->id);
+                       }
                }
 
                /* if a default backend was specified, let's find it */
index e02b8914ffbe1774a405b10dd360638aeed7856a..d3aa6c8ce1be30d56b95367de2cce0b8285fe06e 100644 (file)
 #include <import/ip_tproxy.h>
 #endif
 
+/* sends a log message when a backend goes down, and also sets last
+ * change date.
+ */
+static void set_backend_down(struct proxy *be)
+{
+       be->last_change = now.tv_sec;
+       be->down_trans++;
+
+       Alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
+       send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
+}
+
+/* Redistribute pending connections when a server goes down. The number of
+ * connections redistributed is returned.
+ */
+static int redistribute_pending(struct server *s)
+{
+       struct pendconn *pc, *pc_bck, *pc_end;
+       int xferred = 0;
+
+       FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
+               struct session *sess = pc->sess;
+               if (sess->be->options & PR_O_REDISP) {
+                       /* The REDISP option was specified. We will ignore
+                        * cookie and force to balance or use the dispatcher.
+                        */
+                       sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
+                       sess->srv = NULL; /* it's left to the dispatcher to choose a server */
+                       http_flush_cookie_flags(&sess->txn);
+                       pendconn_free(pc);
+                       task_wakeup(sess->task);
+                       xferred++;
+               }
+       }
+       return xferred;
+}
+
+/* Check for pending connections at the backend, and assign some of them to
+ * the server coming up. The server's weight is checked before being assigned
+ * connections it may not be able to handle. The total number of transferred
+ * connections is returned.
+ */
+static int check_for_pending(struct server *s)
+{
+       int xferred;
+
+       if (!s->eweight)
+               return 0;
+
+       for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
+               struct session *sess;
+               struct pendconn *p;
+
+               p = pendconn_from_px(s->proxy);
+               if (!p)
+                       break;
+               p->sess->srv = s;
+               sess = p->sess;
+               pendconn_free(p);
+               task_wakeup(sess->task);
+       }
+       return xferred;
+}
 
 /* Sets server <s> down, notifies by all available means, recounts the
  * remaining servers on the proxy and transfers queued sessions whenever
  */
 static void set_server_down(struct server *s)
 {
-       struct pendconn *pc, *pc_bck, *pc_end;
-       struct session *sess;
        int xferred;
 
        if (s->health == s->rise) {
+               int srv_was_paused = s->state & SRV_GOINGDOWN;
 
                s->last_change = now.tv_sec;
-               s->state &= ~SRV_RUNNING;
+               s->state &= ~(SRV_RUNNING | SRV_GOINGDOWN);
                s->proxy->lbprm.set_server_status_down(s);
 
                /* we might have sessions queued on this server and waiting for
                 * a connection. Those which are redispatchable will be queued
                 * to another server or to the proxy itself.
                 */
-               xferred = 0;
-               FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) {
-                       sess = pc->sess;
-                       if ((sess->be->options & PR_O_REDISP)) {
-                               /* The REDISP option was specified. We will ignore
-                                * cookie and force to balance or use the dispatcher.
-                                */
-                               sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET);
-                               sess->srv = NULL; /* it's left to the dispatcher to choose a server */
-                               http_flush_cookie_flags(&sess->txn);
-                               pendconn_free(pc);
-                               task_wakeup(sess->task);
-                               xferred++;
-                       }
-               }
-
+               xferred = redistribute_pending(s);
                sprintf(trash, "%sServer %s/%s is DOWN. %d active and %d backup servers left.%s"
                        " %d sessions active, %d requeued, %d remaining in queue.\n",
                        s->state & SRV_BACKUP ? "Backup " : "",
@@ -91,15 +138,16 @@ static void set_server_down(struct server *s)
                        s->cur_sess, xferred, s->nbpend);
 
                Warning("%s", trash);
-               send_log(s->proxy, LOG_ALERT, "%s", trash);
 
-               if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
-                       s->proxy->last_change = now.tv_sec;
-                       s->proxy->down_trans++;
+               /* we don't send an alert if the server was previously paused */
+               if (srv_was_paused)
+                       send_log(s->proxy, LOG_NOTICE, "%s", trash);
+               else
+                       send_log(s->proxy, LOG_ALERT, "%s", trash);
+
+               if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+                       set_backend_down(s->proxy);
 
-                       Alert("%s '%s' has no server available!\n", proxy_type_str(s->proxy), s->proxy->id);
-                       send_log(s->proxy, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(s->proxy), s->proxy->id);
-               }
                s->down_trans++;
        }
        s->health = 0; /* failure */
@@ -260,6 +308,12 @@ static int event_srv_chk_r(int fd)
                /* check the reply : HTTP/1.X 2xx and 3xx are OK */
                if (trash[9] == '2' || trash[9] == '3')
                        s->result |= SRV_CHK_RUNNING;
+               else if ((s->proxy->options & PR_O_DISABLE404) &&
+                        (s->state & SRV_RUNNING) &&
+                        (memcmp(&trash[9], "404", 3) == 0)) {
+                       /* 404 may be accepted as "stopping" only if the server was up */
+                       s->result |= SRV_CHK_RUNNING | SRV_CHK_DISABLE;
+               }
                else
                        s->result |= SRV_CHK_ERROR;
        }
@@ -301,6 +355,7 @@ void process_chk(struct task *t, struct timeval *next)
        __label__ new_chk, out;
        struct server *s = t->context;
        struct sockaddr_in sa;
+       int xferred;
        int fd;
        int rv;
 
@@ -475,12 +530,63 @@ void process_chk(struct task *t, struct timeval *next)
                if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */
                        //fprintf(stderr, "process_chk: 9\n");
 
+                       /* we may have to add/remove this server from the LB group */
+                       if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) {
+                               if ((s->state & SRV_GOINGDOWN) &&
+                                   ((s->result & (SRV_CHK_RUNNING|SRV_CHK_DISABLE)) == SRV_CHK_RUNNING)) {
+                                       /* server enabled again */
+                                       s->state &= ~SRV_GOINGDOWN;
+                                       s->proxy->lbprm.set_server_status_up(s);
+
+                                       /* check if we can handle some connections queued at the proxy. We
+                                        * will take as many as we can handle.
+                                        */
+                                       xferred = check_for_pending(s);
+
+                                       sprintf(trash,
+                                               "Load-balancing on %sServer %s/%s is enabled again. %d active and %d backup servers online.%s"
+                                               " %d sessions requeued, %d total in queue.\n",
+                                               s->state & SRV_BACKUP ? "Backup " : "",
+                                               s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck,
+                                               (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
+                                               xferred, s->nbpend);
+
+                                       Warning("%s", trash);
+                                       send_log(s->proxy, LOG_NOTICE, "%s", trash);
+                               }
+                               else if (!(s->state & SRV_GOINGDOWN) &&
+                                        ((s->result & (SRV_CHK_RUNNING | SRV_CHK_DISABLE)) ==
+                                         (SRV_CHK_RUNNING | SRV_CHK_DISABLE))) {
+                                       /* server disabled */
+                                       s->state |= SRV_GOINGDOWN;
+                                       s->proxy->lbprm.set_server_status_down(s);
+
+                                       /* we might have sessions queued on this server and waiting for
+                                        * a connection. Those which are redispatchable will be queued
+                                        * to another server or to the proxy itself.
+                                        */
+                                       xferred = redistribute_pending(s);
+
+                                       sprintf(trash,
+                                               "Load-balancing on %sServer %s/%s is disabled. %d active and %d backup servers online.%s"
+                                               " %d sessions requeued, %d total in queue.\n",
+                                               s->state & SRV_BACKUP ? "Backup " : "",
+                                               s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck,
+                                               (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
+                                               xferred, s->nbpend);
+
+                                       Warning("%s", trash);
+
+                                       send_log(s->proxy, LOG_NOTICE, "%s", trash);
+                                       if (!s->proxy->srv_bck && !s->proxy->srv_act)
+                                               set_backend_down(s->proxy);
+                               }
+                       }
+
                        if (s->health < s->rise + s->fall - 1) {
                                s->health++; /* was bad, stays for a while */
 
                                if (s->health == s->rise) {
-                                       int xferred;
-
                                        if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) {
                                                if (s->proxy->last_change < now.tv_sec)         // ignore negative times
                                                        s->proxy->down_time += now.tv_sec - s->proxy->last_change;
@@ -497,18 +603,7 @@ void process_chk(struct task *t, struct timeval *next)
                                        /* check if we can handle some connections queued at the proxy. We
                                         * will take as many as we can handle.
                                         */
-                                       for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) {
-                                               struct session *sess;
-                                               struct pendconn *p;
-
-                                               p = pendconn_from_px(s->proxy);
-                                               if (!p)
-                                                       break;
-                                               p->sess->srv = s;
-                                               sess = p->sess;
-                                               pendconn_free(p);
-                                               task_wakeup(sess->task);
-                                       }
+                                       xferred = check_for_pending(s);
 
                                        sprintf(trash,
                                                "%sServer %s/%s is UP. %d active and %d backup servers online.%s"