]> git.ipfire.org Git - thirdparty/dhcp.git/commitdiff
- Failover pairs now implement 'MAC Affinity' on leases moving from the
authorDavid Hankins <dhankins@isc.org>
Fri, 16 Jun 2006 19:26:45 +0000 (19:26 +0000)
committerDavid Hankins <dhankins@isc.org>
Fri, 16 Jun 2006 19:26:45 +0000 (19:26 +0000)
  active to free states.  Leases that belonged to the failover secondary
  are moved to BACKUP state rather than FREE upon exiting EXPIRED state.
  If lease rebalancing must move leases, it tries first to move leases
  that belong to the peer in need.
- The server no longer sends POOLREQ messages unless the pool is severely
  misbalanced in the peer's favor (see 'man dhcpd.conf' for more details).
- Pool rebalance events no longer happen upon successfully allocating a
  lease.  Instead, they happen on a schedule.  See 'man dhcpd.conf' for the
  min-balance and max-balance statements for more information.

  [ISC-Bugs #13308]

RELNOTES
common/conflex.c
includes/dhcpd.h
includes/dhctoken.h
includes/failover.h
server/confpars.c
server/dhcp.c
server/dhcpd.conf.5
server/failover.c
server/mdb.c

index 5bfc9406eb4807b35866d0e62bdcff45b86cb42a..0f358ca71a174f070df2f89510c628c2854f4aa8 100644 (file)
--- a/RELNOTES
+++ b/RELNOTES
@@ -106,6 +106,19 @@ and for prodding me into improving it.
 - Some patches to improve DHCP Server startup speed from Andrew Matheson
   have been incorporated.
 
+- Failover pairs now implement 'MAC Affinity' on leases moving from the
+  active to free states.  Leases that belonged to the failover secondary
+  are moved to BACKUP state rather than FREE upon exiting EXPIRED state.
+  If lease rebalancing must move leases, it tries first to move leases
+  that belong to the peer in need.
+
+- The server no longer sends POOLREQ messages unless the pool is severely
+  misbalanced in the peer's favor (see 'man dhcpd.conf' for more details).
+
+- Pool rebalance events no longer happen upon successfully allocating a
+  lease.  Instead, they happen on a schedule.  See 'man dhcpd.conf' for the
+  min-balance and max-balance statements for more information.
+
                        Changes since 3.0.4
 
 - A warning that host statements declared within subnet or shared-network
index db3de04505e0573cbb41e86d9b680b7c205d77e8..33ffad542ae4e13fe89ae11602a98c4ca78ef0a5 100644 (file)
@@ -34,7 +34,7 @@
 
 #ifndef lint
 static char copyright[] =
-"$Id: conflex.c,v 1.99 2006/06/06 16:35:18 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
+"$Id: conflex.c,v 1.100 2006/06/16 19:26:44 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
 #endif /* not lint */
 
 #include "dhcpd.h"
@@ -836,8 +836,16 @@ static enum dhcp_token intern (atom, dfv)
                if (!strncasecmp (atom + 1, "ax", 2)) {
                        if (!atom [3])
                                return TOKEN_MAX;
-                       if (!strcasecmp (atom + 3, "-lease-time"))
-                               return MAX_LEASE_TIME;
+                       if (!strcasecmp (atom + 3, "-balance"))
+                               return MAX_BALANCE;
+                       if (!strcasecmp (atom + 3, "-lease-")) {
+                               if (!strcasecmp(atom + 10, "misbalance"))
+                                       return MAX_LEASE_MISBALANCE;
+                               if (!strcasecmp(atom + 10, "ownership"))
+                                       return MAX_LEASE_OWNERSHIP;
+                               if (!strcasecmp(atom + 10, "time"))
+                                       return MAX_LEASE_TIME;
+                       }
                        if (!strcasecmp (atom + 3, "-transmit-idle"))
                                return MAX_TRANSMIT_IDLE;
                        if (!strcasecmp (atom + 3, "-response-delay"))
@@ -846,6 +854,8 @@ static enum dhcp_token intern (atom, dfv)
                                return MAX_UNACKED_UPDATES;
                }
                if (!strncasecmp (atom + 1, "in-", 3)) {
+                       if (!strcasecmp (atom + 4, "balance"))
+                               return MIN_BALANCE;
                        if (!strcasecmp (atom + 4, "lease-time"))
                                return MIN_LEASE_TIME;
                        if (!strcasecmp (atom + 4, "secs"))
index 952edd29b3c074f1640b606cc2a213088f7c04a4..4735be73e58de60bd389dbfe4f92c30ff76b6f29 100644 (file)
@@ -2666,8 +2666,8 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *,
                                      enum failover_state);
 isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *,
                                               failover_message_t *);
-int dhcp_failover_pool_rebalance (dhcp_failover_state_t *);
-int dhcp_failover_pool_check (struct pool *);
+void dhcp_failover_pool_rebalance (void *);
+void dhcp_failover_pool_check (struct pool *);
 int dhcp_failover_state_pool_check (dhcp_failover_state_t *);
 void dhcp_failover_timeout (void *);
 void dhcp_failover_send_contact (void *);
@@ -2751,6 +2751,7 @@ void dhcp_failover_recover_done (void *);
 void failover_print PROTO ((char *, unsigned *, unsigned, const char *));
 void update_partner PROTO ((struct lease *));
 int load_balance_mine (struct packet *, dhcp_failover_state_t *);
+int peer_wants_lease (struct lease *);
 binding_state_t normal_binding_state_transition_check (struct lease *,
                                                       dhcp_failover_state_t *,
                                                       binding_state_t,
index ea332c4b65e77d64bcaf9e6e2a259e8cab585ab9..5de43130758ebe8bc7731dade7024ce600f51d1d 100644 (file)
@@ -318,7 +318,11 @@ enum dhcp_token {
        SIZE = 622,
        EPOCH = 623,
        DB_TIME_FORMAT = 624,
-       LOCAL = 625
+       LOCAL = 625,
+       MAX_LEASE_MISBALANCE = 626,
+       MAX_LEASE_OWNERSHIP = 627,
+       MAX_BALANCE = 628,
+       MIN_BALANCE = 629
 };
 
 #define is_identifier(x)       ((x) >= FIRST_TOKEN &&  \
index 576d6bb2db0a5998232a1cd9dc015c88c9b3a99e..c35f3249538ece429d14179822e290637bf98910 100644 (file)
@@ -49,6 +49,31 @@ typedef struct {
        u_int8_t *data;
 } failover_option_t;
 
+/* Failover configuration defaults. */
+#ifndef  DEFAULT_MAX_BALANCE_TIME
+# define DEFAULT_MAX_BALANCE_TIME      3600
+#endif
+
+#ifndef  DEFAULT_MIN_BALANCE_TIME
+# define DEFAULT_MIN_BALANCE_TIME      60
+#endif
+
+#ifndef  DEFAULT_MAX_LEASE_MISBALANCE
+# define DEFAULT_MAX_LEASE_MISBALANCE   15
+#endif
+
+#ifndef  DEFAULT_MAX_LEASE_OWNERSHIP
+# define DEFAULT_MAX_LEASE_OWNERSHIP    10
+#endif
+
+#ifndef  DEFAULT_MAX_FLYING_UPDATES
+# define DEFAULT_MAX_FLYING_UPDATES    100
+#endif
+
+#ifndef  DEFAULT_MAX_RESPONSE_DELAY
+# define DEFAULT_MAX_RESPONSE_DELAY    20
+#endif
+
 #define FM_OFFSET(x) (long)(&(((failover_message_t *)0) -> x))
 
 /* All of the below definitions are mandated by draft-ietf-dhc-failover-12.
@@ -313,6 +338,10 @@ typedef struct _dhcp_failover_state {
        u_int8_t *hba;  /* Hash bucket array for load balancing. */
        int load_balance_max_secs;
 
+       unsigned int max_lease_misbalance, max_lease_ownership;
+       u_int32_t max_balance, min_balance;
+       TIME last_balance, sched_balance;
+
        enum service_state service_state;
        const char *nrr;        /* Printable reason why we're in the
                                   not_responding service state (empty
index f4969f9da2c71a5018bf981cb454892351f13cb0..06c74cf557a11192df9115cedbab26b3d3bc582c 100644 (file)
@@ -34,7 +34,7 @@
 
 #ifndef lint
 static char copyright[] =
-"$Id: confpars.c,v 1.156 2006/06/15 17:49:49 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
+"$Id: confpars.c,v 1.157 2006/06/16 19:26:44 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
 #endif /* not lint */
 
 #include "dhcpd.h"
@@ -876,6 +876,22 @@ void parse_failover_peer (cfile, group, type)
                        cp -> port = atoi (val);
                        break;
 
+                     case MAX_LEASE_MISBALANCE:
+                       tp = &peer->max_lease_misbalance;
+                       goto parse_idle;
+
+                     case MAX_LEASE_OWNERSHIP:
+                       tp = &peer->max_lease_ownership;
+                       goto parse_idle;
+
+                     case MAX_BALANCE:
+                       tp = &peer->max_balance;
+                       goto parse_idle;
+
+                     case MIN_BALANCE:
+                       tp = &peer->min_balance;
+                       goto parse_idle;
+
                      case MAX_RESPONSE_DELAY:
                        tp = &cp -> max_response_delay;
                      parse_idle:
@@ -1011,16 +1027,22 @@ void parse_failover_peer (cfile, group, type)
                            "primary failover server must have mclt.");
            }
        }
-       if (!peer -> me.max_flying_updates) {
-               peer -> me.max_flying_updates = 100;
-       }
-       if (!peer -> me.max_response_delay) {
-               peer -> me.max_response_delay = 60;
-       }
 
-       if (type == SHARED_NET_DECL) {
-               group -> shared_network -> failover_peer = peer;
-       }
+       if (!peer->max_lease_misbalance)
+               peer->max_lease_misbalance = DEFAULT_MAX_LEASE_MISBALANCE;
+       if (!peer->max_lease_ownership)
+               peer->max_lease_ownership = DEFAULT_MAX_LEASE_OWNERSHIP;
+       if (!peer->max_balance)
+               peer->max_balance = DEFAULT_MAX_BALANCE_TIME;
+       if (!peer->min_balance)
+               peer->min_balance = DEFAULT_MIN_BALANCE_TIME;
+       if (!peer->me.max_flying_updates)
+               peer->me.max_flying_updates = DEFAULT_MAX_FLYING_UPDATES;
+       if (!peer->me.max_response_delay)
+               peer->me.max_response_delay = DEFAULT_MAX_RESPONSE_DELAY;
+
+       if (type == SHARED_NET_DECL)
+               group->shared_network->failover_peer = peer;
 
        /* Set the initial state. */
        if (peer -> i_am == primary) {
index 0b89dca32b67ae8f15d6ab5526ffd61e4c9a1a7c..8be04bf43b9d2df3c199353ecb97d4ec340b4bc7 100644 (file)
@@ -34,7 +34,7 @@
 
 #ifndef lint
 static char copyright[] =
-"$Id: dhcp.c,v 1.205 2006/06/15 17:52:06 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
+"$Id: dhcp.c,v 1.206 2006/06/16 19:26:45 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
 #endif /* not lint */
 
 #include "dhcpd.h"
@@ -289,16 +289,13 @@ void dhcpdiscover (packet, ms_nulltp)
        if (lease && lease -> pool && lease -> pool -> failover_peer) {
                peer = lease -> pool -> failover_peer;
 
-               /* If the lease is ours to allocate, then allocate it. */
-               if (lease_mine_to_reallocate(lease)) {
-                       if (lease->pool && lease->pool->failover_peer)
-                               dhcp_failover_pool_check(lease->pool);
-
-               /* If the lease is active, it belongs to the client.  This
+               /* If the lease is ours to allocate, then allocate it.
+                * If the lease is active, it belongs to the client.  This
                 * is the right lease, if we are to offer one.  We decide
                 * wether or not to offer later on.
                 */
-               } else if (lease->binding_state == FTS_ACTIVE) {
+               if (lease->binding_state == FTS_ACTIVE ||
+                   lease_mine_to_reallocate(lease)) {
                        ; /* This space intentionally left blank. */
 
                /* Otherwise, we can't let the client have this lease. */
@@ -327,10 +324,6 @@ void dhcpdiscover (packet, ms_nulltp)
                                           packet -> shared_network -> name);
                        return;
                }
-#if defined (FAILOVER_PROTOCOL)
-               if (lease -> pool && lease -> pool -> failover_peer)
-                       dhcp_failover_pool_check (lease -> pool);
-#endif
        }
 
 #if defined (FAILOVER_PROTOCOL)
index 4d766724ba1f50d9b12fc6b0ff6250ea157dcbbf..d886d9b42e8cafa1999cd8092420b961f1087a56 100644 (file)
@@ -28,7 +28,7 @@
 .\" see ``http://www.vix.com''.   To learn more about Nominum, Inc., see
 .\" ``http://www.nominum.com''.
 .\"
-.\" $Id: dhcpd.conf.5,v 1.72 2006/06/15 17:49:49 dhankins Exp $
+.\" $Id: dhcpd.conf.5,v 1.73 2006/06/16 19:26:45 dhankins Exp $
 .\"
 .TH dhcpd.conf 5
 .SH NAME
@@ -613,6 +613,46 @@ the port number declared in the \fBport\fR statement.
 .RE
 .PP
 The 
+.I max-lease-misbalance
+statement
+.RS 0.25i
+.PP
+.B max-lease-misbalance \fIinteger\fR\fB;\fR
+.PP
+The \fBmax-lease-misbalance\fR statement tells the DHCP server what
+percentage of total free leases (as defined as the total number of
+leases in either the FREE or BACKUP states) a peer is allowed to own
+before a rebalance check is made.  Configuring higher values causes
+the server to rebalance less frequently, but permits a larger misbalance
+between the FREE and BACKUP lease pools.  Configuring a lower value
+causes the server to rebalance more frequently, but keeps the pools more
+balanced.  ISC DHCP servers no longer send POOLREQ messages unless the
+misbalance is at least twice this percentage in the peer's favor.  Valid
+values are between 0 and 100.  The default is 15.
+.RE
+.PP
+The
+.I max-lease-ownership
+statement
+.RS 0.25i
+.PP
+.B max-lease-ownership \fIinteger\fR\fB;\fR
+.PP
+The \fBmax-lease-ownership\fR statement tells the DHCP server what
+percentage of total free leases either it or its peer are normally allowed to
+own in excess of balance for the purpose of MAC Address Affinity.  When a
+server undergoes a lease rebalancing operation, it first tries to move as
+many leases as it can to the peer whose previous client was Load-Balanced to
+that peer (as governed by the Load Balance Algorithm, see the \fBsplit\fR
+configuration value).  The \fBmax-lease-ownership\fR value determines the
+maximum percentage of leases either server will hold before giving its
+peer the oldest leases (regardless of the previous client's place in the
+Load Balance algorithm).  Valid values are between 0 and 100, and should
+probably be less than the \fBmax-lease-misbalance\fR value.  Larger values
+will allow servers to retain leases to reallocate to returning clients,
+smaller values promote pool balance.  The default is 10.
+.PP
+The
 .I max-response-delay
 statement
 .RS 0.25i
@@ -629,11 +669,6 @@ constantly making and breaking connections.   This parameter must be
 specified.
 .RE
 .PP
-The 
-.I max-unacked-updates
-statement
-.RS 0.25i
-.PP
 .B max-unacked-updates \fIcount\fR\fB;\fR
 .PP
 The \fBmax-unacked-updates\fR statement tells the DHCP server how
@@ -643,6 +678,33 @@ to say what a good value for this is, but 10 seems to work.   This
 parameter must be specified.
 .RE
 .PP
+The
+.I min-balance
+and
+.I max-balance
+statements
+.RS 0.25i
+.PP
+.B min-balance \fIseconds\fR\fB;\fR
+.B max-balance \fIseconds\fR\fB;\fR
+.PP
+The DHCP Server schedules pool rebalance events at a time between these
+two values, estimated to be when the the \fBmax-lease-misbalance\fR percent
+of leases have been allocated by its peer.  This estimate is reached from
+however many seconds have elapsed since the oldest lease in the failover
+peer's pool has been expired.
+.PP
+The \fBmin-balance\fR value defaults to 60, one minute, and the
+\fBmax-balance\fR value defaults to 3600, one hour.
+.PP
+Lease rebalancing events can be CPU intensive, particular on installations
+where failover peers may have large numbers of pools and addresses to
+examine, so these parameters should be used to keep the estimation of
+the need for pool rebalance sane...not so long that you are in danger of
+exhausting your pool, not so short that your server is constantly
+rebalancing.
+.RE
+.PP
 The 
 .I mclt
 statement
@@ -672,10 +734,14 @@ statement
 The split statement specifies the split between the primary and
 secondary for the purposes of load balancing.   Whenever a client
 makes a DHCP request, the DHCP server runs a hash on the client
-identification.   If the hash comes out to less than the split value,
-the primary answers.   If it comes out to equal to or more than the
-split, the secondary answers.   The only meaningful value is 128, and can
-only be configured on the primary.
+identification, resulting in value from 0 to 255.  This is used as
+an index into a 256 bit field.  If the bit at that index is set,
+the primary is responsible.  If the bit at that index is not set,
+the secondary is responsible.  The \fBsplit\fR value determines
+how many of the leading bits are set to one.  So, in practice, higher
+split values will cause the primary to serve more clients than the
+secondary.  Lower split values, the converse.  Legal values are between
+0 and 255, of which the most reasonable is 128.
 .RE
 .PP
 The 
@@ -695,10 +761,27 @@ for such fine-grained control, however.   An example hba statement:
       00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00;
 .fi
 .PP
-This is equivalent to a \fBsplit 128;\fR statement.  You must only have
-\fBsplit\fR or \fBhba\fR defined, never both.  For most cases, the
-fine-grained control that \fBhba\fR offers isn't necessary, and \fBsplit\fR
-should be used.  As such, the use of \fBhba\fR is deprecated.
+This is equivalent to a \fBsplit 128;\fR statement, and identical.  The
+following two examples are also equivalent to a \fBsplit\fR of 128, but 
+are not identical:
+.PP
+.nf
+  hba aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:
+      aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa:aa;
+
+  hba 55:55:55:55:55:55:55:55:55:55:55:55:55:55:55:55:
+      55:55:55:55:55:55:55:55:55:55:55:55:55:55:55:55;
+.fi
+.PP
+They are equivalent, because half the bits are set to 0, half are set to
+1 (0xa and 0x5 are 1010 and 0101 binary respectively) and consequently this
+would roughly divide the clients equally between the servers.  They are not
+identical, because the actual peers this would load balance to each server
+are different for each example.
+.PP
+You must only have \fBsplit\fR or \fBhba\fR defined, never both.  For most
+cases, the fine-grained control that \fBhba\fR offers isn't necessary, and
+\fBsplit\fR should be used.
 .RE
 .PP
 The 
index 7726a844730cfa35d3c1b3544fa36f4299ebf4f4..5dbd6064baa0718b13a120aa70b164644868f88f 100644 (file)
@@ -34,7 +34,7 @@
 
 #ifndef lint
 static char copyright[] =
-"$Id: failover.c,v 1.61 2006/05/04 21:14:21 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
+"$Id: failover.c,v 1.62 2006/06/16 19:26:45 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
 #endif /* not lint */
 
 #include "dhcpd.h"
@@ -53,6 +53,12 @@ static isc_result_t failover_message_reference (failover_message_t **,
 static isc_result_t failover_message_dereference (failover_message_t **,
                                                  const char *file, int line);
 
+static void dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state);
+static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state);
+static INLINE int secondary_not_hoarding(dhcp_failover_state_t *state,
+                                        struct pool *p);
+
+
 void dhcp_failover_startup ()
 {
        dhcp_failover_state_t *state;
@@ -1383,7 +1389,7 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *o,
                        dhcp_failover_process_update_done (state,
                                                           link -> imsg);
                } else if (link -> imsg -> type == FTM_POOLREQ) {
-                       dhcp_failover_pool_rebalance (state);
+                       dhcp_failover_pool_reqbalance(state);
                } else if (link -> imsg -> type == FTM_POOLRESP) {
                        log_info ("pool response: %ld leases",
                                  (unsigned long)
@@ -2211,10 +2217,40 @@ isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state,
        return ISC_R_SUCCESS;
 }
 
-int dhcp_failover_pool_rebalance (dhcp_failover_state_t *state)
+/* Entry from timer. */
+void dhcp_failover_pool_rebalance(void *failover_state)
 {
-       int lts;
+       dhcp_failover_state_t *state;
+
+       state = (dhcp_failover_state_t *)failover_state;
+
+       if (dhcp_failover_pool_dobalance(state))
+               dhcp_failover_send_updates(state);
+}
+
+/* Entry from POOLREQ. */
+static void dhcp_failover_pool_reqbalance(dhcp_failover_state_t *state)
+{
+       int queued;
+
+       queued = dhcp_failover_pool_dobalance(state);
+
+       dhcp_failover_send_poolresp(state, queued);
+
+       if (queued)
+               dhcp_failover_send_updates(state);
+       else
+               log_info("peer %s: Got POOLREQ, answering negatively!  "
+                        "Peer may be out of leases or database inconsistent.",
+                        state->name);
+}
+
+/* Do the meat of the work common to all forms of pool rebalance. */
+static int dhcp_failover_pool_dobalance(dhcp_failover_state_t *state)
+{
+       int lts, total, thresh, hold, pass;
        int leases_queued = 0;
+       int reqsent = 0;
        struct lease *lp = (struct lease *)0;
        struct lease *next = (struct lease *)0;
        struct shared_network *s;
@@ -2223,14 +2259,17 @@ int dhcp_failover_pool_rebalance (dhcp_failover_state_t *state)
        binding_state_t peer_lease_state;
        binding_state_t my_lease_state;
        struct lease **lq;
-       int tenper;
 
-       if (state -> me.state != normal || state -> i_am == secondary)
+       if (state -> me.state != normal)
                return 0;
 
-       for (s = shared_networks; s; s = s -> next) {
-           for (p = s -> pools; p; p = p -> next) {
-               if (p -> failover_peer != state)
+       state->last_balance = cur_time;
+       cancel_timeout(dhcp_failover_pool_rebalance, state);
+       state->sched_balance = 0;
+
+       for (s = shared_networks ; s ; s = s->next) {
+           for (p = s->pools ; p ; p = p->next) {
+               if (p->failover_peer != state)
                    continue;
 
                /* Right now we're giving the peer half of the free leases.
@@ -2239,125 +2278,205 @@ int dhcp_failover_pool_rebalance (dhcp_failover_state_t *state)
                   of leases the peer has, will be how many more leases we
                   have than the peer has.   So if we send half that number
                   to the peer, we should be even. */
-               if (p -> failover_peer -> i_am == primary) {
-                       lts = (p -> free_leases - p -> backup_leases) / 2;
+               if (p->failover_peer->i_am == primary) {
+                       lts = (p->free_leases - p->backup_leases) / 2;
                        peer_lease_state = FTS_BACKUP;
                        my_lease_state = FTS_FREE;
-                       lq = &p -> free;
+                       lq = &p->free;
                } else {
-                       lts = (p -> backup_leases - p -> free_leases) / 2;
+                       lts = (p->backup_leases - p->free_leases) / 2;
                        peer_lease_state = FTS_FREE;
                        my_lease_state = FTS_BACKUP;
-                       lq = &p -> backup;
+                       lq = &p->backup;
+               }
+
+               log_info ("pool %lx %s  total %d  free %d  backup %d  lts %d",
+                       (unsigned long)p,
+                       (p->shared_network ?
+                        p->shared_network->name : ""), p->lease_count,
+                       p->free_leases, p->backup_leases, lts);
+
+               total = p->backup_leases + p->free_leases;
+
+               thresh = ((total * state->max_lease_misbalance) + 50) / 100;
+               hold = ((total * state->max_lease_ownership) + 50) / 100;
+
+               /* If lts is in the negatives (we need leases) more than
+                * negative double the thresh%, panic and send poolreq to
+                * hopefully wake up the peer.
+                */
+               if (!reqsent && (lts < (thresh * -2))) {
+                       dhcp_failover_send_poolreq(state);
+                       reqsent = 1;
                }
 
-               tenper = (p -> backup_leases + p -> free_leases) / 10;
-               if (tenper == 0)
-                       tenper = 1;
-               if (lts > tenper) {
-                   log_info ("pool %lx %s  total %d  free %d  %s %d  lts %d",
-                         (unsigned long)p,
-                         (p -> shared_network ?
-                          p -> shared_network -> name : ""), p -> lease_count,
-                         p -> free_leases, "backup", p -> backup_leases, lts);
+               /* Do not go through the process unless at least we have
+                * more than thresh% more leases than the peer.
+                */
+               if (lts <= thresh) {
+                       log_info("pool %lx %s: lts <= max-lease-misbalance "
+                                "(%d), pool rebalance event skipped.",
+                                (unsigned long)p,
+                                (p->shared_network ?
+                                 p->shared_network->name : ""), thresh);
+
+                       /* Recalculate next rebalance event timer. */
+                       dhcp_failover_pool_check(p);
+                       continue;
+               }
 
-                   lease_reference (&lp, *lq, MDL);
+               /* In the first pass, try to allocate leases to the
+                * peer which it would normally be responsible for (if
+                * the lease has a hardware address or client-identifier,
+                * and the load-balance-algorithm chooses the peer to
+                * answer that address), up to a hold% excess in the peer's
+                * favor.  In the second pass, just send the oldest (first
+                * on the list) leases up to a hold% excess in our favor.
+                *
+                * This could make for additional pool rebalance
+                * events, but preserving MAC possession should be
+                * worth it.
+                */
+               pass = 0;
+               lease_reference(&lp, *lq, MDL);
 
-                   while (lp && lts) {
-                       /* Remember the next lease in the list. */
+               /* hold may be zero (consider the case where there are 2
+                * leases, both on one server), therefore use >=.
+                */
+               while (lp && (lts >= (pass ? hold : -hold))) {
                        if (next)
-                           lease_dereference (&next, MDL);
-                       if (lp -> next)
-                           lease_reference (&next, lp -> next, MDL);
-
-                       --lts;
-                       ++leases_queued;
-                       lp -> next_binding_state = peer_lease_state;
-                       lp -> tstp = cur_time;
-                       lp -> starts = cur_time;
-
-                       if (!supersede_lease (lp, (struct lease *)0, 0, 1, 0)
-                           || !write_lease (lp))
-                       {
-                           log_info ("can't commit lease %s on giveaway",
-                                     piaddr (lp -> ip_addr));
+                           lease_dereference(&next, MDL);
+                       if (lp->next)
+                           lease_reference(&next, lp->next, MDL);
+
+                       if (pass || peer_wants_lease(lp)) {
+                           --lts;
+                           ++leases_queued;
+                           lp->next_binding_state = peer_lease_state;
+                           lp->tstp = cur_time;
+                           lp->starts = cur_time;
+
+                           if (!supersede_lease(lp, NULL, 0, 1, 0) ||
+                               !write_lease(lp))
+                                   log_error("can't commit lease %s on "
+                                             "giveaway", piaddr(lp->ip_addr));
                        }
 
-                       lease_dereference (&lp, MDL);
+                       lease_dereference(&lp, MDL);
                        if (next)
-                               lease_reference (&lp, next, MDL);
-                   }
-                   if (next)
-                       lease_dereference (&next, MDL);
-                   if (lp)
-                       lease_dereference (&lp, MDL);
-
-               }
-               if (lts > 1) {
-                       log_info ("lease imbalance - lts = %d", lts);
+                               lease_reference(&lp, next, MDL);
+                       else if (!pass) {
+                               pass = 1;
+                               lease_reference(&lp, *lq, MDL);
+                       }
                }
+
+               if (next)
+                       lease_dereference(&next, MDL);
+               if (lp)
+                       lease_dereference(&lp, MDL);
+
+               if (lts > thresh)
+                       log_error("lease imbalance persists - lts = %d", lts);
+               /* Recalculate next rebalance event timer. */
+               dhcp_failover_pool_check(p);
            }
        }
-       commit_leases();
-       dhcp_failover_send_poolresp (state, leases_queued);
-       dhcp_failover_send_updates (state);
+
+       if (leases_queued)
+               commit_leases();
+
        return leases_queued;
 }
 
-int dhcp_failover_pool_check (struct pool *pool)
+/* dhcp_failover_pool_check: Called whenever FREE or BACKUP leases change
+ * states, on both servers.  Check the scheduled time to rebalance the pool
+ * and lower it if applicable.
+ */
+void
+dhcp_failover_pool_check(struct pool *pool)
 {
-       int lts;
-       struct lease *lp;
-       int tenper;
+       dhcp_failover_state_t *peer;
+       TIME est1, est2;
 
-       if (!pool -> failover_peer ||
-           pool -> failover_peer -> me.state != normal)
-               return 0;
+       peer = pool->failover_peer;
+
+       if(!peer || peer->me.state != normal)
+               return;
 
-       if (pool -> failover_peer -> i_am == primary)
-               lts = (pool -> backup_leases - pool -> free_leases) / 2;
+       /* Estimate the time left until lease exhaustion.
+        * The first lease on the backup or free lists is also the oldest
+        * lease.  It is reasonable to guess that it will take at least
+        * as much time for a pool to run out of leases, as the present
+        * age of the oldest lease (seconds since it expired).
+        *
+        * Note that this isn't so sane of an assumption if the oldest
+        * lease is a virgin (ends = 0), we wind up sending this against
+        * the max_balance bounds check.
+        */
+       if(pool->free && pool->free->ends < cur_time)
+               est1 = cur_time - pool->free->ends;
        else
-               lts = (pool -> free_leases - pool -> backup_leases) / 2;
-
-       log_info ("pool %lx %s total %d  free %d  backup %d  lts %d",
-                 (unsigned long)pool,
-                 pool -> shared_network ? pool -> shared_network -> name : "",
-                 pool -> lease_count,
-                 pool -> free_leases, pool -> backup_leases, lts);
-
-       tenper = (pool -> backup_leases + pool -> free_leases) / 10;
-       if (tenper == 0)
-               tenper = 1;
-       if (lts > tenper) {
-               /* XXX What about multiple pools? */
-               if (pool -> failover_peer -> i_am == secondary) {
-                       /* Ask the primary to send us leases. */
-                       dhcp_failover_send_poolreq (pool -> failover_peer);
-                       return 1;
-               } else {
-                       /* Figure out how many leases to skip on the backup
-                          list.   We skip the earliest leases on the list
-                          to reduce the chance of trying to steal a lease
-                          that the secondary is about to allocate. */
-                       int i = pool -> backup_leases - lts;
-                       log_info ("Taking %d leases from secondary.", lts);
-                       for (lp = pool -> backup; lp; lp = lp -> next) {
-                               /* Skip to the last leases on the free
-                                  list, because they are less likely
-                                  to already have been allocated. */
-                               if (i)
-                                       --i;
-                               else {
-                                       lp -> desired_binding_state = FTS_FREE;
-                                       dhcp_failover_queue_update (lp, 1);
-                                       --lts;
-                               }
-                       }
-                       if (lts)
-                               log_info ("failed to take %d leases.", lts);
-               }
+               est1 = 0;
+
+       if(pool->backup && pool->backup->ends < cur_time)
+               est2 = cur_time - pool->backup->ends;
+       else
+               est2 = 0;
+
+       /* We don't want to schedule rebalance for when we think we'll run
+        * out of leases, we want to schedule the rebalance for when we think
+        * the disparity will be 'large enough' to warrant action.
+        */
+       est1 = ((est1 * peer->max_lease_misbalance) + 50) / 100;
+       est2 = ((est2 * peer->max_lease_misbalance) + 50) / 100;
+
+       /* Guess when the local system will begin issuing POOLREQ panic
+        * attacks because "max_lease_misbalance*2" has been exceeded.
+        */
+       if(peer->i_am == primary)
+               est1 *= 2;
+       else
+               est2 *= 2;
+
+       /* Select the smallest time. */
+       if(est1 > est2)
+               est1 = est2;
+
+       /* Bounded by the maximum configured value. */
+       if(est1 > peer->max_balance)
+               est1 = peer->max_balance;
+
+       /* Project this time into the future. */
+       est1 += cur_time;
+
+       /* Do not move the time down under the minimum. */
+       est2 = peer->last_balance + peer->min_balance;
+       if(peer->last_balance && (est1 < est2))
+               est1 = est2;
+
+       /* Do not move the time forward, or reset to the same time. */
+       if(peer->sched_balance) {
+               if (est1 >= peer->sched_balance)
+                       return;
+
+               /* We are about to schedule the time down, cancel the
+                * current timeout.
+                */
+               cancel_timeout(dhcp_failover_pool_rebalance, peer);
        }
-       return 0;
+
+       /* The time is different, and lower, use it. */
+       peer->sched_balance = est1;
+
+#if defined(DEBUG_FAILOVER_TIMING)
+       log_info("add_timeout +%d dhcp_failover_pool_rebalance",
+                est1 - cur_time);
+#endif
+       add_timeout(est1, dhcp_failover_pool_rebalance, peer,
+                       (tvref_t)dhcp_failover_state_reference,
+                       (tvunref_t)dhcp_failover_state_dereference);
 }
 
 int dhcp_failover_state_pool_check (dhcp_failover_state_t *state)
@@ -2370,9 +2489,7 @@ int dhcp_failover_state_pool_check (dhcp_failover_state_t *state)
                for (p = s -> pools; p; p = p -> next) {
                        if (p -> failover_peer != state)
                                continue;
-                       /* Only need to request rebalance on one pool. */
-                       if (dhcp_failover_pool_check (p))
-                               return 1;
+                       dhcp_failover_pool_check (p);
                }
        }
        return 0;
@@ -4600,6 +4717,7 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state,
        int reason = FTR_MISC_REJECT;
        const char *message;
        int new_binding_state;
+       int send_to_backup = 0;
 
        ia.len = sizeof msg -> assigned_addr;
        memcpy (ia.iabuf, &msg -> assigned_addr, ia.len);
@@ -4784,8 +4902,17 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state,
                    new_binding_state == FTS_RELEASED ||
                    new_binding_state == FTS_RESET) {
                        lt -> next_binding_state = FTS_FREE;
-               } else
+
+                       /* Mac address affinity.  Assign the lease to
+                        * BACKUP state if we are the primary and the
+                        * peer is more likely to reallocate this lease
+                        * to a returning client.
+                        */
+                       if (state->i_am == primary)
+                               send_to_backup = peer_wants_lease(lt);
+               } else {
                        lt -> next_binding_state = new_binding_state;
+               }
                msg -> binding_status = lt -> next_binding_state;
        }
 
@@ -4795,10 +4922,27 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state,
                message = "database update failed";
              bad:
                dhcp_failover_send_bind_ack (state, msg, reason, message);
+               goto out;
        } else {
                dhcp_failover_queue_ack (state, msg);
        }
 
+       /* If it is probably wise, assign lease to backup state if the peer
+        * is not already hoarding leases.
+        */
+       if (send_to_backup && secondary_not_hoarding(state, lt->pool)) {
+               lt->next_binding_state = FTS_BACKUP;
+               lt->tstp = cur_time;
+               lt->starts = cur_time;
+
+               if (!supersede_lease(lt, NULL, 0, 1, 0) ||
+                   !write_lease(lt))
+                       log_error("can't commit lease %s for mac addr "
+                                 "affinity", piaddr(lt->ip_addr));
+
+               dhcp_failover_send_updates(state);
+       }
+
       out:
        if (lt)
                lease_dereference (&lt, MDL);
@@ -4808,6 +4952,34 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state,
        return ISC_R_SUCCESS;
 }
 
+/* This was hairy enough I didn't want to do it all in an if statement.
+ *
+ * Returns: Truth is the secondary is allowed to get more leases based upon
+ * MAC address affinity.  False otherwise.
+ */
+static INLINE int
+secondary_not_hoarding(dhcp_failover_state_t *state, struct pool *p) {
+       int total;
+       int hold;
+       int lts;
+
+       total = p->free_leases + p->backup_leases;
+
+       /* How many leases is one side or the other allowed to "hold"? */
+       hold = ((total * state->max_lease_ownership) + 50) / 100;
+
+       /* If we were to send leases (or if the secondary were to send us
+        * leases in the negative direction), how many would that be?
+        */
+       lts = (p->free_leases - p->backup_leases) / 2;
+
+       /* The peer is not hoarding leases if we would send them more leases
+        * (or they would take fewer leases) than the maximum they are allowed
+        * to hold (the negative hold).
+        */
+       return(lts > -hold);
+}
+
 isc_result_t dhcp_failover_process_bind_ack (dhcp_failover_state_t *state,
                                             failover_message_t *msg)
 {
@@ -4864,6 +5036,26 @@ isc_result_t dhcp_failover_process_bind_ack (dhcp_failover_state_t *state,
                        lease->next_binding_state = FTS_FREE;
                supersede_lease(lease, (struct lease *)0, 0, 0, 0);
                write_lease(lease);
+
+               /* Lease has returned to FREE state from the
+                * transitional states.  If the lease 'belongs'
+                * to a client that would be served by the
+                * peer, process a binding update now to send
+                * the lease to backup state.
+                */
+               if (state->i_am == primary &&
+                   peer_wants_lease(lease)) {
+                       lease->next_binding_state = FTS_BACKUP;
+                       lease->tstp = cur_time;
+                       lease->starts = cur_time;
+
+                       if (!supersede_lease(lease, NULL, 0, 1, 0) ||
+                           !write_lease(lease))
+                               log_error("can't commit lease %s for "
+                                         "client affinity",
+                                         piaddr(lease->ip_addr));
+               }
+
                if (state->me.state == normal)
                        commit_leases ();
        } else {
@@ -5242,6 +5434,40 @@ int load_balance_mine (struct packet *packet, dhcp_failover_state_t *state)
                return !hm;
 }
 
+/* The inverse of load_balance_mine ("load balance theirs").  We can't
+ * use the regular load_balance_mine() and invert it because of the case
+ * where there might not be an HBA, and we want to indicate false here
+ * in this case only.
+ */
+int
+peer_wants_lease(struct lease *lp)
+{
+       dhcp_failover_state_t *state;
+       unsigned char hbaix;
+       int hm;
+
+       if (!lp->pool)
+               return 0;
+
+       state = lp->pool->failover_peer;
+
+       if (!state || !state->hba)
+               return 0;
+
+       if (lp->uid_len)
+               hbaix = loadb_p_hash (lp->uid, lp->uid_len);
+       else
+               hbaix = loadb_p_hash (lp->hardware_addr.hbuf,
+                                     lp->hardware_addr.hlen);
+
+       hm = state->hba[(hbaix >> 3) & 0x1F] & (1 << (hbaix & 0x07));
+
+       if (state->i_am == primary)
+               return !hm;
+       else
+               return hm;
+}
+
 /* This deals with what to do with bind updates when
    we're in the normal state 
 
index 3d10960524178a301a34a369250acd9d8b880f68..86b060a1ec45dad4b651138a2041aaf7b388e089 100644 (file)
@@ -34,7 +34,7 @@
 
 #ifndef lint
 static char copyright[] =
-"$Id: mdb.c,v 1.80 2006/06/09 15:51:02 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
+"$Id: mdb.c,v 1.81 2006/06/16 19:26:45 dhankins Exp $ Copyright (c) 2004-2006 Internet Systems Consortium.  All rights reserved.\n";
 #endif /* not lint */
 
 #include "dhcpd.h"
@@ -875,8 +875,9 @@ int supersede_lease (comp, lease, commit, propogate, pimmediate)
        int enter_hwaddr = 0;
        struct lease *lp, **lq, *prev;
        TIME lp_next_state;
-
 #if defined (FAILOVER_PROTOCOL)
+       int do_pool_check = 0;
+
        /* We must commit leases before sending updates regarding them
           to failover peers.  It is, therefore, an error to set pimmediate
           and not commit. */
@@ -1070,6 +1071,10 @@ int supersede_lease (comp, lease, commit, propogate, pimmediate)
                lq = &comp -> pool -> free;
                if (!(comp->flags & RESERVED_LEASE))
                        comp->pool->free_leases--;
+
+#if defined(FAILOVER_PROTOCOL)
+               do_pool_check = 1;
+#endif
                break;
 
              case FTS_ACTIVE:
@@ -1090,6 +1095,10 @@ int supersede_lease (comp, lease, commit, propogate, pimmediate)
                lq = &comp -> pool -> backup;
                if (!(comp->flags & RESERVED_LEASE))
                        comp->pool->backup_leases--;
+
+#if defined(FAILOVER_PROTOCOL)
+               do_pool_check = 1;
+#endif
                break;
 
              default:
@@ -1180,6 +1189,8 @@ int supersede_lease (comp, lease, commit, propogate, pimmediate)
                if (!dhcp_failover_queue_update (comp, pimmediate))
                        return 0;
        }
+       if (do_pool_check && comp->pool->failover_peer)
+               dhcp_failover_pool_check(comp->pool);
 #endif
 
        /* If the current binding state has already expired, do an