From: Simon Horman Date: Mon, 25 Nov 2013 01:46:36 +0000 (+0900) Subject: MEDIUM: checks: Add supplementary agent checks X-Git-Tag: v1.5-dev20~215 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d60d69138bd102243709d76a005c10dddab2bcb2;p=thirdparty%2Fhaproxy.git MEDIUM: checks: Add supplementary agent checks Allow an auxiliary agent check to be run independently of the regular a regular health check. This is enabled by the agent-check server setting. The agent-port, which specifies the TCP port to use for the agent's connections, is required. The agent-inter, which specifies the interval between agent checks and timeout of agent checks, is optional. If not set the value for regular checks is used. e.g. server web1_1 127.0.0.1:80 check agent-port 10000 If either the health or agent check determines that a server is down then it is marked as being down, otherwise it is marked as being up. An agent health check performed by opening a TCP socket and reading an ASCII string. The string should have one of the following forms: * An ASCII representation of an positive integer percentage. e.g. "75%" Values in this format will set the weight proportional to the initial weight of a server as configured when haproxy starts. * The string "drain". This will cause the weight of a server to be set to 0, and thus it will not accept any new connections other than those that are accepted via persistence. * The string "down", optionally followed by a description string. Mark the server as down and log the description string as the reason. * The string "stopped", optionally followed by a description string. This currently has the same behaviour as "down". * The string "fail", optionally followed by a description string. This currently has the same behaviour as "down". Signed-off-by: Simon Horman --- diff --git a/doc/configuration.txt b/doc/configuration.txt index 75b77c321d..25fbf8f64c 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -777,11 +777,12 @@ nosplice "option splice-response". spread-checks <0..50, in percent> - Sometimes it is desirable to avoid sending health checks to servers at exact - intervals, for instance when many logical servers are located on the same - physical server. With the help of this parameter, it becomes possible to add - some randomness in the check interval between 0 and +/- 50%. A value between - 2 and 5 seems to show good results. The default value remains at 0. + Sometimes it is desirable to avoid sending agent and health checks to + servers at exact intervals, for instance when many logical servers are + located on the same physical server. With the help of this parameter, it + becomes possible to add some randomness in the check interval between 0 + and +/- 50%. A value between 2 and 5 seems to show good results. The + default value remains at 0. tune.bufsize Sets the buffer size to this size (in bytes). Lower values allow more @@ -7669,6 +7670,66 @@ addr Supported in default-server: No +agent-check + Enable an auxiliary agent check which is run independently of a regular + health check. An agent health check is performed by making a TCP + connection to the port set by the "agent-port" parameter" and reading + an ASCII string. The string should have one of the following forms: + + * An ASCII representation of an positive integer percentage. + e.g. "75%" + + Values in this format will set the weight proportional to the initial + weight of a server as configured when haproxy starts. + + * The string "drain". + + This will cause the weight of a server to be set to 0, and thus it will + not accept any new connections other than those that are accepted via + persistence. + + * The string "down", optionally followed by a description string. + + Mark the server as down and log the description string as the reason. + + * The string "stopped", optionally followed by a description string. + + This currently has the same behaviour as "down". + + * The string "fail", optionally followed by a description string. + + This currently has the same behaviour as "down". + + Requires the ""agent-port" parameter to be set. + See also the "agent-check" parameter. + + Supported in default-server: No + +agent-inter + The "agent-inter" parameter sets the interval between two agent checks + to milliseconds. If left unspecified, the delay defaults to 2000 ms. + + Just as with every other time-based parameter, it may be entered in any + other explicit unit among { us, ms, s, m, h, d }. The "agent-inter" + parameter also serves as a timeout for agent checks "timeout check" is + not set. In order to reduce "resonance" effects when multiple servers are + hosted on the same hardware, the agent and health checks of all servers + are started with a small time offset between them. It is also possible to + add some random noise in the agent and health checks interval using the + global "spread-checks" keyword. This makes sense for instance when a lot + of backends use the same servers. + + See also the "agent-check" and "agent-port" parameters. + + Supported in default-server: Yes + +agent-port + The "agent-port" parameter sets the TCP port used for agent checks. + + See also the "agent-check" and "agent-inter" parameters. + + Supported in default-server: Yes + backup When "backup" is present on a server line, the server is only used in load balancing when all other non-backup servers are unavailable. Requests coming @@ -7844,11 +7905,11 @@ downinter other explicit unit among { us, ms, s, m, h, d }. The "inter" parameter also serves as a timeout for health checks sent to servers if "timeout check" is not set. In order to reduce "resonance" effects when multiple servers are - hosted on the same hardware, the health-checks of all servers are started - with a small time offset between them. It is also possible to add some random - noise in the health checks interval using the global "spread-checks" - keyword. This makes sense for instance when a lot of backends use the same - servers. + hosted on the same hardware, the agent and health checks of all servers + are started with a small time offset between them. It is also possible to + add some random noise in the agent and health checks interval using the + global "spread-checks" keyword. This makes sense for instance when a lot + of backends use the same servers. Supported in default-server: Yes diff --git a/include/types/server.h b/include/types/server.h index 1df56e9606..73d426d103 100644 --- a/include/types/server.h +++ b/include/types/server.h @@ -55,6 +55,9 @@ /* unused: 0x0100, 0x0200, 0x0400 */ #define SRV_SEND_PROXY 0x0800 /* this server talks the PROXY protocol */ #define SRV_NON_STICK 0x1000 /* never add connections allocated to this server to a stick table */ +#define SRV_AGENT_CHECKED 0x2000 /* this server needs to be checked using an agent check. + * This is run independently of the main check whose + * presence is indicated by the SRV_CHECKED flag */ /* function which act on servers need to return various errors */ #define SRV_STATUS_OK 0 /* everything is OK. */ @@ -190,6 +193,7 @@ struct server { } check_common; struct check check; /* health-check specific configuration */ + struct check agent; /* agent specific configuration */ #ifdef USE_OPENSSL int use_ssl; /* ssl enabled */ diff --git a/src/cfgparse.c b/src/cfgparse.c index 724b434b03..7df7de0da8 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -1325,9 +1325,13 @@ void init_default_instance() defproxy.defsrv.check.inter = DEF_CHKINTR; defproxy.defsrv.check.fastinter = 0; defproxy.defsrv.check.downinter = 0; + defproxy.defsrv.agent.inter = DEF_CHKINTR; + defproxy.defsrv.agent.fastinter = 0; + defproxy.defsrv.agent.downinter = 0; defproxy.defsrv.rise = DEF_RISETIME; defproxy.defsrv.fall = DEF_FALLTIME; defproxy.defsrv.check.port = 0; + defproxy.defsrv.agent.port = 0; defproxy.defsrv.maxqueue = 0; defproxy.defsrv.minconn = 0; defproxy.defsrv.maxconn = 0; @@ -4172,7 +4176,7 @@ stats_error_parsing: else if (!strcmp(args[0], "server") || !strcmp(args[0], "default-server")) { /* server address */ int cur_arg; short realport = 0; - int do_check = 0, defsrv = (*args[0] == 'd'); + int do_agent = 0, do_check = 0, defsrv = (*args[0] == 'd'); if (!defsrv && curproxy == &defproxy) { Alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]); @@ -4219,6 +4223,7 @@ stats_error_parsing: LIST_INIT(&newsrv->actconns); LIST_INIT(&newsrv->pendconns); do_check = 0; + do_agent = 0; newsrv->state = SRV_RUNNING; /* early server setup */ newsrv->last_change = now.tv_sec; newsrv->id = strdup(args[1]); @@ -4272,11 +4277,16 @@ stats_error_parsing: goto out; } - newsrv->check.use_ssl = curproxy->defsrv.check.use_ssl; + newsrv->check.use_ssl = curproxy->defsrv.check.use_ssl; newsrv->check.port = curproxy->defsrv.check.port; newsrv->check.inter = curproxy->defsrv.check.inter; newsrv->check.fastinter = curproxy->defsrv.check.fastinter; newsrv->check.downinter = curproxy->defsrv.check.downinter; + newsrv->agent.use_ssl = curproxy->defsrv.agent.use_ssl; + newsrv->agent.port = curproxy->defsrv.agent.port; + newsrv->agent.inter = curproxy->defsrv.agent.inter; + newsrv->agent.fastinter = curproxy->defsrv.agent.fastinter; + newsrv->agent.downinter = curproxy->defsrv.agent.downinter; newsrv->rise = curproxy->defsrv.rise; newsrv->fall = curproxy->defsrv.fall; newsrv->maxqueue = curproxy->defsrv.maxqueue; @@ -4296,6 +4306,10 @@ stats_error_parsing: newsrv->check.health = newsrv->rise; /* up, but will fall down at first failure */ newsrv->check.server = newsrv; + newsrv->agent.status = HCHK_STATUS_INI; + newsrv->agent.health = newsrv->rise; /* up, but will fall down at first failure */ + newsrv->agent.server = newsrv; + cur_arg = 3; } else { newsrv = &curproxy->defsrv; @@ -4303,7 +4317,33 @@ stats_error_parsing: } while (*args[cur_arg]) { - if (!defsrv && !strcmp(args[cur_arg], "cookie")) { + if (!strcmp(args[cur_arg], "agent-check")) { + global.maxsock++; + do_agent = 1; + cur_arg += 1; + } else if (!strcmp(args[cur_arg], "agent-inter")) { + const char *err = parse_time_err(args[cur_arg + 1], &val, TIME_UNIT_MS); + if (err) { + Alert("parsing [%s:%d] : unexpected character '%c' in 'agent-inter' argument of server %s.\n", + file, linenum, *err, newsrv->id); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + if (val <= 0) { + Alert("parsing [%s:%d]: invalid value %d for argument '%s' of server %s.\n", + file, linenum, val, args[cur_arg], newsrv->id); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + newsrv->agent.inter = val; + cur_arg += 2; + } + else if (!strcmp(args[cur_arg], "agent-port")) { + global.maxsock++; + newsrv->agent.port = atol(args[cur_arg + 1]); + cur_arg += 2; + } + else if (!defsrv && !strcmp(args[cur_arg], "cookie")) { newsrv->cookie = strdup(args[cur_arg + 1]); newsrv->cklen = strlen(args[cur_arg + 1]); cur_arg += 2; @@ -4331,6 +4371,8 @@ stats_error_parsing: if (newsrv->check.health) newsrv->check.health = newsrv->rise; + if (newsrv->agent.health) + newsrv->agent.health = newsrv->rise; cur_arg += 2; } else if (!strcmp(args[cur_arg], "fall")) { @@ -4512,6 +4554,7 @@ stats_error_parsing: newsrv->state |= SRV_MAINTAIN; newsrv->state &= ~SRV_RUNNING; newsrv->check.health = 0; + newsrv->agent.health = 0; cur_arg += 1; } else if (!defsrv && !strcmp(args[cur_arg], "observe")) { @@ -4913,6 +4956,28 @@ stats_error_parsing: newsrv->state |= SRV_CHECKED; } + if (do_agent) { + int ret; + + if (!newsrv->agent.port) { + Alert("parsing [%s:%d] : server %s does not have agent port. Agent check has been disabled.\n", + file, linenum, newsrv->id); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + if (!newsrv->agent.inter) + newsrv->agent.inter = newsrv->check.inter; + + ret = init_check(&newsrv->agent, PR_O2_LB_AGENT_CHK, file, linenum); + if (ret) { + err_code |= ret; + goto out; + } + + newsrv->state |= SRV_AGENT_CHECKED; + } + if (!defsrv) { if (newsrv->state & SRV_BACKUP) curproxy->srv_bck++; @@ -6802,6 +6867,7 @@ out_uri_auth_compat: newsrv->state |= SRV_MAINTAIN; newsrv->state &= ~SRV_RUNNING; newsrv->check.health = 0; + newsrv->agent.health = 0; } newsrv->track = srv; diff --git a/src/checks.c b/src/checks.c index d865c0b2ff..cffba0210b 100644 --- a/src/checks.c +++ b/src/checks.c @@ -398,7 +398,7 @@ void set_server_down(struct check *check) check->health = s->rise; } - if (check->health == s->rise || s->track) { + if ((s->state & SRV_RUNNING && check->health == s->rise) || s->track) { int srv_was_paused = s->state & SRV_GOINGDOWN; int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act; @@ -465,7 +465,8 @@ void set_server_up(struct check *check) { check->health = s->rise; } - if (check->health == s->rise || s->track) { + if ((s->check.health >= s->rise && s->agent.health >= s->rise && + check->health == s->rise) || s->track) { if (s->proxy->srv_bck == 0 && s->proxy->srv_act == 0) { if (s->proxy->last_change < now.tv_sec) // ignore negative times s->proxy->down_time += now.tv_sec - s->proxy->last_change; @@ -1314,8 +1315,11 @@ static struct task *process_chk(struct task *t) check->bo->p = check->bo->data; check->bo->o = 0; - /* prepare the check buffer */ - if (check->type) { + /* prepare the check buffer + * This should not be used if check is the secondary agent check + * of a server as s->proxy->check_req will relate to the + * configuration of the primary check */ + if (check->type && check != &s->agent) { bo_putblk(check->bo, s->proxy->check_req, s->proxy->check_len); /* we want to check if this host replies to HTTP or SSLv3 requests @@ -1584,12 +1588,20 @@ int start_checks() { */ for (px = proxy; px; px = px->next) { for (s = px->srv; s; s = s->next) { - if (!(s->state & SRV_CHECKED)) - continue; + /* A task for the main check */ + if (s->state & SRV_CHECKED) { + if (!start_check_task(&s->check, mininter, nbcheck, srvpos)) + return -1; + srvpos++; + } - if (!start_check_task(&s->check, mininter, nbcheck, srvpos)) - return -1; - srvpos++; + /* A task for a auxiliary agent check */ + if (s->state & SRV_AGENT_CHECKED) { + if (!start_check_task(&s->agent, mininter, nbcheck, srvpos)) { + return -1; + } + srvpos++; + } } } return 0; diff --git a/src/haproxy.c b/src/haproxy.c index bc03a73b8e..e03219aa2c 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -1120,6 +1120,10 @@ void deinit(void) task_delete(s->check.task); task_free(s->check.task); } + if (s->agent.task) { + task_delete(s->agent.task); + task_free(s->agent.task); + } if (s->warmup) { task_delete(s->warmup); @@ -1130,6 +1134,8 @@ void deinit(void) free(s->cookie); free(s->check.bi); free(s->check.bo); + free(s->agent.bi); + free(s->agent.bo); free(s); s = s_next; }/* end while(s) */