From: Willy Tarreau Date: Fri, 30 Nov 2007 09:41:39 +0000 (+0100) Subject: [MEDIUM] implement "http-check disable-on-404" for graceful shutdown X-Git-Tag: v1.3.14~32 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=48494c0c5c4d08492522aa00296143c6b6a72943;p=thirdparty%2Fhaproxy.git [MEDIUM] implement "http-check disable-on-404" for graceful shutdown When an HTTP server returns "404 not found", it indicates that at least part of it is still running. For this reason, it can be convenient for application administrators to be able to consider code 404 as valid, but for a server which does not want to participate to load balancing anymore. This is useful to seamlessly exclude a server from a farm without acting on the load balancer. For instance, let's consider that haproxy checks for the "/alive" file. To enable load balancing on a server, the admin would simply do : # touch /var/www/alive And to disable the server, he would simply do : # rm /var/www/alive Another immediate gain from doing this is that it is now possible to send NOTICE messages instead of ALERT messages when a server is first disable, then goes down. This provides a graceful shutdown method. To enable this behaviour, specify "http-check disable-on-404" in the backend. --- diff --git a/include/types/proxy.h b/include/types/proxy.h index 2a75fad9e6..fd48e707ff 100644 --- a/include/types/proxy.h +++ b/include/types/proxy.h @@ -102,6 +102,7 @@ #define PR_O_TCPSPLICE 0x08000000 /* delegate data transfer to linux kernel's tcp_splice */ #define PR_O_CONTSTATS 0x10000000 /* continous counters */ #define PR_O_HTTP_PROXY 0x20000000 /* Enable session to use HTTP proxy operations */ +#define PR_O_DISABLE404 0x40000000 /* Disable a server on a 404 response to a health-check */ /* This structure is used to apply fast weighted round robin on a server group */ struct fwrr_group { diff --git a/include/types/server.h b/include/types/server.h index 47c152c3d6..b50e1936d8 100644 --- a/include/types/server.h +++ b/include/types/server.h @@ -41,6 +41,7 @@ #define SRV_MAPPORTS 0x0004 /* this server uses mapped ports */ #define SRV_BIND_SRC 0x0008 /* this server uses a specific source address */ #define SRV_CHECKED 0x0010 /* this server needs to be checked */ +#define SRV_GOINGDOWN 0x0020 /* this server says that it's going down (404) */ #define SRV_TPROXY_ADDR 0x0020 /* bind to this non-local address to reach this server */ #define SRV_TPROXY_CIP 0x0040 /* bind to the client's IP address to reach this server */ diff --git a/src/backend.c b/src/backend.c index a8676b3faa..cc12f7ad62 100644 --- a/src/backend.c +++ b/src/backend.c @@ -60,6 +60,8 @@ static inline int srv_is_usable(int state, int weight) { if (!weight) return 0; + if (state & SRV_GOINGDOWN) + return 0; if (!(state & SRV_RUNNING)) return 0; return 1; @@ -193,7 +195,8 @@ void recalc_server_map(struct proxy *px) int max = 0; best = NULL; for (cur = px->srv; cur; cur = cur->next) { - if ((cur->state & (SRV_RUNNING | SRV_BACKUP)) == flag) { + if (flag == (cur->state & + (SRV_RUNNING | SRV_GOINGDOWN | SRV_BACKUP))) { int v; /* If we are forced to return only one server, we don't want to diff --git a/src/cfgparse.c b/src/cfgparse.c index affb9569a8..f283332b4e 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -1292,6 +1292,19 @@ int cfg_parse_listen(const char *file, int linenum, char **args) /* enable reconnections to dispatch */ curproxy->options |= PR_O_REDISP; } + else if (!strcmp(args[0], "http-check")) { + if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL)) + return 0; + + if (strcmp(args[1], "disable-on-404") == 0) { + /* enable a graceful server shutdown on an HTTP 404 response */ + curproxy->options |= PR_O_DISABLE404; + } + else { + Alert("parsing [%s:%d] : '%s' only supports 'disable-on-404'.\n", file, linenum, args[0]); + return -1; + } + } #ifdef TPROXY else if (!strcmp(args[0], "transparent")) { /* enable transparent proxy connections */ @@ -2525,6 +2538,11 @@ int readcfgfile(const char *file) Warning("parsing %s : Layer 7 hash not possible for %s '%s'. Falling back to round robin.\n", file, proxy_type_str(curproxy), curproxy->id); } + if (curproxy->options & PR_O_DISABLE404) { + curproxy->options &= ~PR_O_DISABLE404; + Warning("parsing %s : '%s' will be ignored for %s '%s' (requires HTTP mode).\n", + file, "disable-on-404", proxy_type_str(curproxy), curproxy->id); + } } if (curproxy->mode == PR_MODE_HEALTH) { /* TCP PROXY or HEALTH CHECK */ @@ -2540,6 +2558,11 @@ int readcfgfile(const char *file) file, curproxy->id); cfgerr++; } + if ((curproxy->options & PR_O_DISABLE404) && !(curproxy->options & PR_O_HTTP_CHK)) { + curproxy->options &= ~PR_O_DISABLE404; + Warning("parsing %s : '%s' will be ignored for %s '%s' (requires 'option httpchk').\n", + file, "disable-on-404", proxy_type_str(curproxy), curproxy->id); + } } /* if a default backend was specified, let's find it */ diff --git a/src/checks.c b/src/checks.c index e02b8914ff..d3aa6c8ce1 100644 --- a/src/checks.c +++ b/src/checks.c @@ -45,6 +45,69 @@ #include #endif +/* sends a log message when a backend goes down, and also sets last + * change date. + */ +static void set_backend_down(struct proxy *be) +{ + be->last_change = now.tv_sec; + be->down_trans++; + + Alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id); + send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id); +} + +/* Redistribute pending connections when a server goes down. The number of + * connections redistributed is returned. + */ +static int redistribute_pending(struct server *s) +{ + struct pendconn *pc, *pc_bck, *pc_end; + int xferred = 0; + + FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) { + struct session *sess = pc->sess; + if (sess->be->options & PR_O_REDISP) { + /* The REDISP option was specified. We will ignore + * cookie and force to balance or use the dispatcher. + */ + sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET); + sess->srv = NULL; /* it's left to the dispatcher to choose a server */ + http_flush_cookie_flags(&sess->txn); + pendconn_free(pc); + task_wakeup(sess->task); + xferred++; + } + } + return xferred; +} + +/* Check for pending connections at the backend, and assign some of them to + * the server coming up. The server's weight is checked before being assigned + * connections it may not be able to handle. The total number of transferred + * connections is returned. + */ +static int check_for_pending(struct server *s) +{ + int xferred; + + if (!s->eweight) + return 0; + + for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) { + struct session *sess; + struct pendconn *p; + + p = pendconn_from_px(s->proxy); + if (!p) + break; + p->sess->srv = s; + sess = p->sess; + pendconn_free(p); + task_wakeup(sess->task); + } + return xferred; +} /* Sets server down, notifies by all available means, recounts the * remaining servers on the proxy and transfers queued sessions whenever @@ -53,36 +116,20 @@ */ static void set_server_down(struct server *s) { - struct pendconn *pc, *pc_bck, *pc_end; - struct session *sess; int xferred; if (s->health == s->rise) { + int srv_was_paused = s->state & SRV_GOINGDOWN; s->last_change = now.tv_sec; - s->state &= ~SRV_RUNNING; + s->state &= ~(SRV_RUNNING | SRV_GOINGDOWN); s->proxy->lbprm.set_server_status_down(s); /* we might have sessions queued on this server and waiting for * a connection. Those which are redispatchable will be queued * to another server or to the proxy itself. */ - xferred = 0; - FOREACH_ITEM_SAFE(pc, pc_bck, &s->pendconns, pc_end, struct pendconn *, list) { - sess = pc->sess; - if ((sess->be->options & PR_O_REDISP)) { - /* The REDISP option was specified. We will ignore - * cookie and force to balance or use the dispatcher. - */ - sess->flags &= ~(SN_DIRECT | SN_ASSIGNED | SN_ADDR_SET); - sess->srv = NULL; /* it's left to the dispatcher to choose a server */ - http_flush_cookie_flags(&sess->txn); - pendconn_free(pc); - task_wakeup(sess->task); - xferred++; - } - } - + xferred = redistribute_pending(s); sprintf(trash, "%sServer %s/%s is DOWN. %d active and %d backup servers left.%s" " %d sessions active, %d requeued, %d remaining in queue.\n", s->state & SRV_BACKUP ? "Backup " : "", @@ -91,15 +138,16 @@ static void set_server_down(struct server *s) s->cur_sess, xferred, s->nbpend); Warning("%s", trash); - send_log(s->proxy, LOG_ALERT, "%s", trash); - if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) { - s->proxy->last_change = now.tv_sec; - s->proxy->down_trans++; + /* we don't send an alert if the server was previously paused */ + if (srv_was_paused) + send_log(s->proxy, LOG_NOTICE, "%s", trash); + else + send_log(s->proxy, LOG_ALERT, "%s", trash); + + if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) + set_backend_down(s->proxy); - Alert("%s '%s' has no server available!\n", proxy_type_str(s->proxy), s->proxy->id); - send_log(s->proxy, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(s->proxy), s->proxy->id); - } s->down_trans++; } s->health = 0; /* failure */ @@ -260,6 +308,12 @@ static int event_srv_chk_r(int fd) /* check the reply : HTTP/1.X 2xx and 3xx are OK */ if (trash[9] == '2' || trash[9] == '3') s->result |= SRV_CHK_RUNNING; + else if ((s->proxy->options & PR_O_DISABLE404) && + (s->state & SRV_RUNNING) && + (memcmp(&trash[9], "404", 3) == 0)) { + /* 404 may be accepted as "stopping" only if the server was up */ + s->result |= SRV_CHK_RUNNING | SRV_CHK_DISABLE; + } else s->result |= SRV_CHK_ERROR; } @@ -301,6 +355,7 @@ void process_chk(struct task *t, struct timeval *next) __label__ new_chk, out; struct server *s = t->context; struct sockaddr_in sa; + int xferred; int fd; int rv; @@ -475,12 +530,63 @@ void process_chk(struct task *t, struct timeval *next) if ((s->result & (SRV_CHK_ERROR|SRV_CHK_RUNNING)) == SRV_CHK_RUNNING) { /* good server detected */ //fprintf(stderr, "process_chk: 9\n"); + /* we may have to add/remove this server from the LB group */ + if ((s->state & SRV_RUNNING) && (s->proxy->options & PR_O_DISABLE404)) { + if ((s->state & SRV_GOINGDOWN) && + ((s->result & (SRV_CHK_RUNNING|SRV_CHK_DISABLE)) == SRV_CHK_RUNNING)) { + /* server enabled again */ + s->state &= ~SRV_GOINGDOWN; + s->proxy->lbprm.set_server_status_up(s); + + /* check if we can handle some connections queued at the proxy. We + * will take as many as we can handle. + */ + xferred = check_for_pending(s); + + sprintf(trash, + "Load-balancing on %sServer %s/%s is enabled again. %d active and %d backup servers online.%s" + " %d sessions requeued, %d total in queue.\n", + s->state & SRV_BACKUP ? "Backup " : "", + s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck, + (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "", + xferred, s->nbpend); + + Warning("%s", trash); + send_log(s->proxy, LOG_NOTICE, "%s", trash); + } + else if (!(s->state & SRV_GOINGDOWN) && + ((s->result & (SRV_CHK_RUNNING | SRV_CHK_DISABLE)) == + (SRV_CHK_RUNNING | SRV_CHK_DISABLE))) { + /* server disabled */ + s->state |= SRV_GOINGDOWN; + s->proxy->lbprm.set_server_status_down(s); + + /* we might have sessions queued on this server and waiting for + * a connection. Those which are redispatchable will be queued + * to another server or to the proxy itself. + */ + xferred = redistribute_pending(s); + + sprintf(trash, + "Load-balancing on %sServer %s/%s is disabled. %d active and %d backup servers online.%s" + " %d sessions requeued, %d total in queue.\n", + s->state & SRV_BACKUP ? "Backup " : "", + s->proxy->id, s->id, s->proxy->srv_act, s->proxy->srv_bck, + (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "", + xferred, s->nbpend); + + Warning("%s", trash); + + send_log(s->proxy, LOG_NOTICE, "%s", trash); + if (!s->proxy->srv_bck && !s->proxy->srv_act) + set_backend_down(s->proxy); + } + } + if (s->health < s->rise + s->fall - 1) { s->health++; /* was bad, stays for a while */ if (s->health == s->rise) { - int xferred; - if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) { if (s->proxy->last_change < now.tv_sec) // ignore negative times s->proxy->down_time += now.tv_sec - s->proxy->last_change; @@ -497,18 +603,7 @@ void process_chk(struct task *t, struct timeval *next) /* check if we can handle some connections queued at the proxy. We * will take as many as we can handle. */ - for (xferred = 0; !s->maxconn || xferred < srv_dynamic_maxconn(s); xferred++) { - struct session *sess; - struct pendconn *p; - - p = pendconn_from_px(s->proxy); - if (!p) - break; - p->sess->srv = s; - sess = p->sess; - pendconn_free(p); - task_wakeup(sess->task); - } + xferred = check_for_pending(s); sprintf(trash, "%sServer %s/%s is UP. %d active and %d backup servers online.%s"