From: David Hankins Date: Tue, 18 Dec 2007 18:04:22 +0000 (+0000) Subject: - A bug in failover pool rebalancing that caused POOLREQ message ping-pongs X-Git-Tag: v4_1_0a1~49 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f71bc98898bc8fc77d916b8658ae9324471cb273;p=thirdparty%2Fdhcp.git - A bug in failover pool rebalancing that caused POOLREQ message ping-pongs was repaired. [ISC-Bugs #17228] - A flaw in failover pool rebalancing that could cause POOLREQ messages to be sent outside of the min-balance/max-balance scheduled intervals has been repaired. [ISC-Bugs #17228] --- diff --git a/RELNOTES b/RELNOTES index eedc82006..c5019f049 100644 --- a/RELNOTES +++ b/RELNOTES @@ -69,6 +69,13 @@ suggested fixes to . - Fixed definition of the iaaddr hash functions to use the correct functions when referencing and dereferencing memory. +- A bug in failover pool rebalancing that caused POOLREQ message ping-pongs + was repaired. + +- A flaw in failover pool rebalancing that could cause POOLREQ messages to + be sent outside of the min-balance/max-balance scheduled intervals has + been repaired. + Changes since 4.0.0b3 - The reverse dns name for PTR updates on IPv6 addresses has been fixed to diff --git a/server/failover.c b/server/failover.c index ced3447ce..fe4fe4818 100644 --- a/server/failover.c +++ b/server/failover.c @@ -49,7 +49,8 @@ static isc_result_t failover_message_dereference (failover_message_t **, static void dhcp_failover_pool_balance(dhcp_failover_state_t *state); static void dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state); -static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state); +static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state, + isc_boolean_t *sendreq); static INLINE int secondary_not_hoarding(dhcp_failover_state_t *state, struct pool *p); @@ -2234,7 +2235,11 @@ isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state, return ISC_R_SUCCESS; } -/* Balance operation manual entry. */ +/* + * Balance operation manual entry; startup, entrance to normal state. No + * sense sending a POOLREQ at this stage; the peer is likely about to schedule + * their own rebalance event upon entering normal themselves. + */ static void dhcp_failover_pool_balance(dhcp_failover_state_t *state) { @@ -2242,25 +2247,36 @@ dhcp_failover_pool_balance(dhcp_failover_state_t *state) cancel_timeout(dhcp_failover_pool_rebalance, state); state->sched_balance = 0; - dhcp_failover_pool_dobalance(state); + dhcp_failover_pool_dobalance(state, NULL); } -/* Balance operation entry from timer event. */ +/* + * Balance operation entry from timer event. Once per timer interval is + * the only time we want to emit POOLREQs (asserting an interrupt in our + * peer). + */ void dhcp_failover_pool_rebalance(void *failover_state) { dhcp_failover_state_t *state; + isc_boolean_t sendreq = ISC_FALSE; state = (dhcp_failover_state_t *)failover_state; /* Clear scheduled event indicator. */ state->sched_balance = 0; - if (dhcp_failover_pool_dobalance(state)) + if (dhcp_failover_pool_dobalance(state, &sendreq)) dhcp_failover_send_updates(state); + + if (sendreq) + dhcp_failover_send_poolreq(state); } -/* Balance operation entry from POOLREQ protocol message. */ +/* + * Balance operation entry from POOLREQ protocol message. Do not permit a + * POOLREQ to send back a POOLREQ. Ping pong. + */ static void dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state) { @@ -2270,7 +2286,7 @@ dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state) cancel_timeout(dhcp_failover_pool_rebalance, state); state->sched_balance = 0; - queued = dhcp_failover_pool_dobalance(state); + queued = dhcp_failover_pool_dobalance(state, NULL); dhcp_failover_send_poolresp(state, queued); @@ -2282,12 +2298,19 @@ dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state) state->name); } -/* Do the meat of the work common to all forms of pool rebalance. */ -static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state) +/* + * Do the meat of the work common to all forms of pool rebalance. If the + * caller deems it appropriate to transmit POOLREQ messages, it can use the + * sendreq pointer to pass in the address of a FALSE value which this function + * will conditionally turn TRUE if a POOLREQ is determined to be necessary. + * A NULL value may be passed, in which case no action is taken. + */ +static int +dhcp_failover_pool_dobalance(dhcp_failover_state_t *state, + isc_boolean_t *sendreq) { - int lts, total, thresh, hold, pass; + int lts, total, thresh, hold, panic, pass; int leases_queued = 0; - int reqsent = 0; struct lease *lp = (struct lease *)0; struct lease *next = (struct lease *)0; struct shared_network *s; @@ -2296,7 +2319,7 @@ static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state) binding_state_t my_lease_state; struct lease **lq; int (*log_func)(const char *, ...); - const char *result; + const char *result, *reqlog; if (state -> me.state != normal) return 0; @@ -2331,21 +2354,37 @@ static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state) thresh = ((total * state->max_lease_misbalance) + 50) / 100; hold = ((total * state->max_lease_ownership) + 50) / 100; + /* + * If we need leases (so lts is negative) more than negative + * double the thresh%, panic and send poolreq to hopefully wake + * up the peer (but more likely the db is inconsistent). But, + * if this comes out zero, switch to -1 so that the POOLREQ is + * sent on lts == -2 rather than right away at -1. + * + * Note that we do not subtract -1 from panic all the time + * because thresh% and hold% may come out to the same number, + * and that is correct operation...where thresh% and hold% are + * both -1, we want to send poolreq when lts reaches -3. So, + * "-3 < -2", lts < panic. + */ + panic = thresh * -2; + + if (panic == 0) + panic = -1; + + if ((sendreq != NULL) && (lts < panic)) { + reqlog = " (requesting peer rebalance!)"; + *sendreq = ISC_TRUE; + } else + reqlog = ""; + log_info("balancing pool %lx %s total %d free %d " - "backup %d lts %d max-own (+/-)%d", + "backup %d lts %d max-own (+/-)%d%s", (unsigned long)p, (p->shared_network ? p->shared_network->name : ""), p->lease_count, - p->free_leases, p->backup_leases, lts, hold); - - /* If lts is in the negatives (we need leases) more than - * negative double the thresh%, panic and send poolreq to - * hopefully wake up the peer. - */ - if (!reqsent && (lts < (thresh * -2))) { - dhcp_failover_send_poolreq(state); - reqsent = 1; - } + p->free_leases, p->backup_leases, lts, hold, + reqlog); /* In the first pass, try to allocate leases to the * peer which it would normally be responsible for (if