]> git.ipfire.org Git - thirdparty/dhcp.git/commitdiff
- Add a bunch of support for all the funny recovery states.
authorTed Lemon <source@isc.org>
Thu, 24 Aug 2000 18:41:42 +0000 (18:41 +0000)
committerTed Lemon <source@isc.org>
Thu, 24 Aug 2000 18:41:42 +0000 (18:41 +0000)
- Clean up existing state machine support.

server/failover.c

index a321bed41244539fc85e0c79df78537d856466af..aea1966d23d6a8dd8496f53b60226f19a3de8db3 100644 (file)
@@ -43,7 +43,7 @@
 
 #ifndef lint
 static char copyright[] =
-"$Id: failover.c,v 1.21 2000/06/29 20:02:50 mellon Exp $ Copyright (c) 1999-2000 The Internet Software Consortium.  All rights reserved.\n";
+"$Id: failover.c,v 1.22 2000/08/24 18:41:42 mellon Exp $ Copyright (c) 1999-2000 The Internet Software Consortium.  All rights reserved.\n";
 #endif /* not lint */
 
 #include "dhcpd.h"
@@ -1084,7 +1084,8 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *o,
                                omapi_disconnect (link -> outer, 1);
                                return ISC_R_SUCCESS;
                        }
-                       dhcp_failover_state_transition (state, "connect");
+                       /* dhcp_failover_state_transition (state,
+                                                          "connect"); */
                } else if (link -> imsg -> type == FTM_CONNECTACK) {
                    const char *errmsg;
                    int reason;
@@ -1169,6 +1170,15 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *o,
                                                           link -> imsg);
                } else if (link -> imsg -> type == FTM_BNDACK) {
                        dhcp_failover_process_bind_ack (state, link -> imsg);
+               } else if (link -> imsg -> type == FTM_UPDREQ) {
+                       dhcp_failover_process_update_request (state,
+                                                             link -> imsg);
+               } else if (link -> imsg -> type == FTM_UPDREQALL) {
+                       dhcp_failover_process_update_request_all
+                               (state, link -> imsg);
+               } else if (link -> imsg -> type == FTM_UPDDONE) {
+                       dhcp_failover_process_update_done (state,
+                                                          link -> imsg);
                } else if (link -> imsg -> type == FTM_POOLREQ) {
                        dhcp_failover_pool_rebalance (state);
                } else if (link -> imsg -> type == FTM_POOLRESP) {
@@ -1176,6 +1186,11 @@ isc_result_t dhcp_failover_state_signal (omapi_object_t *o,
                                  (unsigned long)
                                  link -> imsg -> addresses_transferred);
                }
+               if (state &&
+                   link -> imsg -> server_state !=
+                   state -> my_state)
+                       dhcp_failover_peer_state_changed (state,
+                                                         link -> imsg);
        }
 
        /* Add a timeout so that if the partner doesn't send another message
@@ -1208,7 +1223,7 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
                cancel_timeout (dhcp_failover_timeout, state);
 
                switch (state -> my_state) {
-                     case potential_conflict_nic:
+                     case resolution_interrupted:
                      case partner_down:
                      case communications_interrupted:
                        /* Already in the right state? */
@@ -1216,7 +1231,7 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
                
                      case potential_conflict:
                        return dhcp_failover_set_state
-                               (state, potential_conflict_nic);
+                               (state, resolution_interrupted);
                                
                      case recover:
                        /* XXX I don't think it makes sense to make a
@@ -1232,9 +1247,10 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
                      case unknown_state:
                      default:
                        return dhcp_failover_set_state
-                               (state, potential_conflict_nic);
+                               (state, resolution_interrupted);
 
                }
+#if 0 /* This is now handled in the partner state transition code. */
        } else if (!strcmp (name, "connect")) {
                switch (state -> my_state) {
                      case communications_interrupted:
@@ -1243,7 +1259,7 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
                      case partner_down:
                        return dhcp_failover_set_state (state, recover);
                
-                     case potential_conflict_nic:
+                     case resolution_interrupted:
                        return dhcp_failover_set_state (state,
                                                        potential_conflict);
 
@@ -1261,11 +1277,23 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
                        return dhcp_failover_set_state (state,
                                                        potential_conflict);
                }
+#endif
        } else if (!strcmp (name, "startup")) {
+               state -> service_state = service_startup;
+               state -> nrr = " (startup)";
+               log_info ("peer %s service state: not responding for 15s",
+                         state -> name);
+               add_timeout (cur_time + 15,
+                            dhcp_failover_startup_timeout,
+                            state,
+                            (tvref_t)omapi_object_reference,
+                            (tvunref_t)
+                            omapi_object_dereference);
+       } else if (!strcmp (name, "connect-timeout")) {
                switch (state -> my_state) {
                      case communications_interrupted:
                      case partner_down:
-                     case potential_conflict_nic:
+                     case resolution_interrupted:
                        return ISC_R_SUCCESS;
 
                      case normal:
@@ -1275,7 +1303,7 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
 
                      case potential_conflict:
                        return dhcp_failover_set_state
-                               (state, potential_conflict_nic);
+                               (state, resolution_interrupted);
 
                      case unknown_state:
                        return dhcp_failover_set_state
@@ -1283,12 +1311,87 @@ isc_result_t dhcp_failover_state_transition (dhcp_failover_state_t *state,
 
                      default:
                        return dhcp_failover_set_state
-                               (state, potential_conflict_nic);
+                               (state, resolution_interrupted);
                }
        }
        return ISC_R_INVALIDARG;
 }
 
+isc_result_t dhcp_failover_set_service_state (dhcp_failover_state_t *state)
+{
+       switch (state -> my_state) {
+             case unknown_state:
+               state -> service_state = not_responding;
+               state -> nrr = " (my state unknown)";
+               break;
+
+             case partner_down:
+               state -> service_state = service_partner_down;
+               state -> nrr = "";
+               break;
+
+             case normal:
+               state -> service_state = cooperating;
+               state -> nrr = "";
+               break;
+
+             case communications_interrupted:
+               state -> service_state = not_cooperating;
+               state -> nrr = "";
+               break;
+
+             case resolution_interrupted:
+             case potential_conflict:
+               state -> service_state = not_responding;
+               state -> nrr = " (resolving conflicts)";
+               break;
+
+             case recover:
+               state -> service_state = not_responding;
+               state -> nrr = " (recovering)";
+               break;
+
+             case shut_down:
+               state -> service_state = not_responding;
+               state -> nrr = " (shut down)";
+               break;
+
+             case paused:
+               state -> service_state = not_responding;
+               state -> nrr = " (paused)";
+               break;
+
+             case recover_done:
+               state -> service_state = not_responding;
+               state -> nrr = " (recover done)";
+               break;
+       }
+
+       /* Some peer states can require us not to respond, even if our
+          state doesn't. */
+       /* XXX hm.   I suspect this isn't true anymore. */
+       if (state -> service_state != not_responding) {
+               switch (state -> partner_state) {
+                     case partner_down:
+                     case recover:
+                       state -> service_state = not_responding;
+                       state -> nrr = " (recovering)";
+                       break;
+
+                     case potential_conflict:
+                       state -> service_state = not_responding;
+                       state -> nrr = " (resolving conflicts)";
+                       break;
+
+                       /* Other peer states don't affect our behaviour. */
+                     default:
+                       break;
+               }
+       }
+
+       return ISC_R_SUCCESS;
+}
+
 isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
                                      enum failover_state new_state)
 {
@@ -1298,8 +1401,6 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
     /* First make the transition out of the current state. */
     switch (state -> my_state) {
       case normal:
-      case recover:
-      case potential_conflict:
        /* Any updates that haven't been acked yet, we have to
           resend, just in case. */
        if (state -> ack_queue_tail) {
@@ -1329,9 +1430,11 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
        cancel_timeout (dhcp_failover_keepalive, state);
        break;
 
+      case recover:
+      case potential_conflict:
       case partner_down:
       case communications_interrupted:
-      case potential_conflict_nic:
+      case resolution_interrupted:
       default:
        break;
     }
@@ -1353,15 +1456,13 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
            return ISC_R_IOERROR;
     }
 
-    log_info ("failover peer %s moves from %s to %s",
+    log_info ("failover peer %s: I move from %s to %s",
              state -> name, dhcp_failover_state_name_print (saved_state),
              dhcp_failover_state_name_print (state -> my_state));
     
     switch (new_state)
     {
       case normal:
-      case recover:
-      case potential_conflict:
        /* If we go into communications-interrupted and then back into
           potential-conflict, do we need to start over, or just continue
           with the reconciliation process?    This came up at one of the
@@ -1372,13 +1473,286 @@ isc_result_t dhcp_failover_set_state (dhcp_failover_state_t *state,
        }
        dhcp_failover_state_pool_check (state);
        break;
+
+      case potential_conflict:
+       if (state -> i_am == primary)
+               dhcp_failover_send_update_request (state);
+       break;
+
       default:
        break;
     }
 
+    dhcp_failover_set_service_state (state);
+
     return ISC_R_SUCCESS;
 }
 
+isc_result_t dhcp_failover_peer_state_changed (dhcp_failover_state_t *state,
+                                              failover_message_t *msg)
+{
+       enum failover_state previous_state = state -> partner_state;
+       enum failover_state new_state = msg -> server_state;
+       int startupp = (msg -> server_flags & FTF_STARTUP) ? 1 : 0;
+
+       /* This isn't a state transition. */
+       if (previous_state == new_state)
+               return ISC_R_SUCCESS;
+
+       state -> partner_state = new_state;
+
+       log_info ("failover peer %s: peer moves from %s to %s",
+                 state -> name,
+                 dhcp_failover_state_name_print (previous_state),
+                 dhcp_failover_state_name_print (state -> partner_state));
+    
+       if (!write_failover_state (state) || !commit_leases ()) {
+               /* This is bad, but it's not fatal.  Of course, if we
+                  can't write to the lease database, we're not going to
+                  get much done anyway. */
+               log_error ("Unable to record current failover state for %s",
+                          state -> name);
+       }
+
+       /* Do any state transitions that are required as a result of the
+          peer's state transition. */
+
+       switch (state -> my_state) {
+             case normal:
+               switch (new_state) {
+                     case normal:
+                     case recover:
+                     case potential_conflict:
+                     case partner_down:
+                     case communications_interrupted:
+                     case resolution_interrupted:
+                       /* XXX this isn't really going to work, is it? */
+                       /* XXX probably should just goto the switch
+                          XXX statement below for
+                          XXX communications_interrupted. */
+                       dhcp_failover_set_state (state,
+                                                communications_interrupted);
+                       break;
+                          
+                     case shut_down:
+                       /* XXX This one is specified, but it's specified in
+                          XXX the documentation for the shut_down state,
+                          XXX not the normal state. */
+                       dhcp_failover_set_state (state, partner_down);
+
+                     case paused:
+                       dhcp_failover_set_state (state,
+                                                communications_interrupted);
+                       break;
+
+                     case recover_done:
+                       /* XXX what to do here? */
+                       break;
+
+                     case unknown_state:
+                       break;
+               }
+               break;
+
+             case recover:
+               switch (new_state) {
+                     case recover:
+                     case recover_done:
+                       log_error ("Failover partner %s in recover state",
+                                  state -> name);
+                       log_error ("while this server is also in recover");
+                       log_error ("state - this is an invalid state");
+                       log_error ("transition, so all processing for this");
+                       log_error ("failover peer is being terminated.");
+                       log_error ("You must intervene manually to resolve");
+                       log_error ("this problem.");
+                       dhcp_failover_set_state (state, shut_down);
+                       break;
+                                  
+                     case potential_conflict:
+                     case resolution_interrupted:
+                     case normal:
+                       dhcp_failover_set_state (state, potential_conflict);
+                       break;
+
+                     case partner_down:
+                     case communications_interrupted:
+                       /* We're supposed to send an update request at this
+                          point. */
+                       /* XXX we don't currently have code here to do any
+                          XXX clever detection of when we should send an
+                          XXX UPDREQALL message rather than an UPDREQ
+                          XXX message.   What to do, what to do? */
+                       dhcp_failover_send_update_request (state);
+                       break;
+
+                     case shut_down:
+                       /* XXX We're not explicitly told what to do in this
+                          XXX case, but this transition is consistent with
+                          XXX what is elsewhere in the draft. */
+                       dhcp_failover_set_state (state, partner_down);
+                       break;
+
+                       /* We can't really do anything in this case. */
+                     case paused:
+                       break;
+
+                     case unknown_state:
+                       break;
+               }
+               break;
+
+             case potential_conflict:
+               switch (new_state) {
+                     case normal:
+                       if (previous_state == potential_conflict &&
+                           state -> i_am == secondary)
+                               dhcp_failover_send_update_request (state);
+                       break;
+
+                     case recover:
+                     case recover_done:
+                     case potential_conflict:
+                     case partner_down:
+                     case communications_interrupted:
+                     case resolution_interrupted:
+                     case paused:
+                       break;
+
+                     case shut_down:
+                       dhcp_failover_set_state (state, partner_down);
+                       break;
+
+                     case unknown_state:
+                       break;
+               }
+               break;
+
+             case partner_down:
+               /* Take no action if other server is starting up. */
+               if (startupp)
+                       break;
+
+               switch (new_state) {
+                       /* This is where we should be. */
+                     case recover:
+                     case recover_done:
+                       break;
+
+                     case normal:
+                     case potential_conflict:
+                     case partner_down:
+                     case communications_interrupted:
+                     case resolution_interrupted:
+                       dhcp_failover_set_state (state, potential_conflict);
+                       break;
+
+                       /* These don't change anything. */
+                     case shut_down:
+                     case paused:
+                       break;
+
+                     case unknown_state:
+                       break;
+               }
+               break;
+
+             case communications_interrupted:
+               switch (new_state) {
+                     case recover:
+                     case paused:
+                       /* Stick with the status quo. */
+                       break;
+
+                     case normal:
+                     case communications_interrupted:
+                     case recover_done:
+                       /* XXX so we don't need to do this specially in
+                          XXX the CONNECT and CONNECTACK handlers. */
+                       dhcp_failover_set_state (state, normal);
+                       break;
+
+                     case potential_conflict:
+                     case partner_down:
+                     case resolution_interrupted:
+                       dhcp_failover_set_state (state, potential_conflict);
+                       break;
+
+                     case shut_down:
+                       dhcp_failover_set_state (state, partner_down);
+                       break;
+
+                     case unknown_state:
+                       break;
+               }
+               break;
+
+             case resolution_interrupted:
+               switch (new_state) {
+                     case normal:
+                     case recover:
+                     case potential_conflict:
+                     case partner_down:
+                     case communications_interrupted:
+                     case resolution_interrupted:
+                     case recover_done:
+                       dhcp_failover_set_state (state, potential_conflict);
+                       break;
+
+                     case shut_down:
+                       dhcp_failover_set_state (state, partner_down);
+                       break;
+
+                     case paused:
+                       break;
+
+                     case unknown_state:
+                       break;
+               }
+               break;
+
+             case recover_done:
+               switch (new_state) {
+                     case normal:
+                       dhcp_failover_set_state (state, normal);
+                       break;
+
+                     case recover:
+                     case potential_conflict:
+                     case partner_down:
+                     case communications_interrupted:
+                     case resolution_interrupted:
+                     case paused:
+                     case recover_done:
+                       break;
+
+                     case shut_down:
+                       dhcp_failover_set_state (state, partner_down);
+                       break;
+
+                     case unknown_state:
+                       break;
+               }
+               break;
+
+               /* We are essentially dead in the water when we're in
+                  either shut_down or paused states, and do not do any
+                  automatic state transitions. */
+             case shut_down:
+             case paused:
+               break;
+
+             case unknown_state:
+               break;  
+       }
+       
+       /* For now, just set the service state based on the peer's state
+          if necessary. */
+       dhcp_failover_set_service_state (state);
+
+       return ISC_R_SUCCESS;
+}
+
 int dhcp_failover_pool_rebalance (dhcp_failover_state_t *state)
 {
        int lts;
@@ -1672,6 +2046,14 @@ void dhcp_failover_reconnect (void *vs)
        }
 }
 
+void dhcp_failover_startup_timeout (void *vs)
+{
+       dhcp_failover_state_t *state = vs;
+       isc_result_t status;
+
+       dhcp_failover_set_service_state (state);
+}
+
 void dhcp_failover_listener_restart (void *vs)
 {
        dhcp_failover_state_t *state = vs;
@@ -2217,8 +2599,8 @@ const char *dhcp_failover_state_name_print (enum failover_state state)
              case communications_interrupted:
                return "communications-interrupted";
 
-             case potential_conflict_nic:
-               return "potential-conflict-nic";
+             case resolution_interrupted:
+               return "resolution-interrupted";
 
              case potential_conflict:
                return "potential-conflict";
@@ -2617,6 +2999,12 @@ void dhcp_failover_send_contact (void *vstate)
        status = (dhcp_failover_put_message
                  (link, link -> outer,
                   FTM_CONTACT,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
                   (failover_option_t *)0));
 
 #if defined (DEBUG_FAILOVER_MESSAGES)
@@ -2664,6 +3052,12 @@ isc_result_t dhcp_failover_send_connect (omapi_object_t *l)
        status = (dhcp_failover_put_message
                  (link, l -> outer,
                   FTM_CONNECT,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
                   dhcp_failover_make_option (FTO_SERVER_ADDR, FMA,
                                              state -> server_identifier.len,
                                              state -> server_identifier.data),
@@ -2720,6 +3114,16 @@ isc_result_t dhcp_failover_send_connectack (omapi_object_t *l,
        status = (dhcp_failover_put_message
                  (link, l -> outer,
                   FTM_CONNECTACK,
+                  (state
+                   ? dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                                state -> my_state)
+                   : &skip_failover_option),
+                  (state
+                   ? (dhcp_failover_make_option
+                      (FTO_SERVER_FLAGS, FMA,
+                       (state -> service_state == service_startup
+                        ? FTF_STARTUP : 0)))
+                   : &skip_failover_option),
                   (state
                    ? (dhcp_failover_make_option
                       (FTO_SERVER_ADDR, FMA,
@@ -2791,6 +3195,12 @@ isc_result_t dhcp_failover_send_disconnect (omapi_object_t *l,
        status = (dhcp_failover_put_message
                  (link, l -> outer,
                   FTM_DISCONNECT,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
                   dhcp_failover_make_option (FTO_REJECT_REASON,
                                              FMA, reason),
                   (message
@@ -2839,6 +3249,12 @@ isc_result_t dhcp_failover_send_bind_update (dhcp_failover_state_t *state,
        status = (dhcp_failover_put_message
                  (link, link -> outer,
                   FTM_BNDUPD,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
                   dhcp_failover_make_option (FTO_ASSIGNED_IP_ADDRESS, FMA,
                                              lease -> ip_addr.len,
                                              lease -> ip_addr.iabuf),
@@ -2912,6 +3328,12 @@ isc_result_t dhcp_failover_send_bind_ack (dhcp_failover_state_t *state,
        status = (dhcp_failover_put_message
                  (link, link -> outer,
                   FTM_BNDACK,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
                   dhcp_failover_make_option (FTO_ASSIGNED_IP_ADDRESS, FMA,
                                              sizeof msg -> assigned_addr,
                                              &msg -> assigned_addr),
@@ -2984,6 +3406,12 @@ isc_result_t dhcp_failover_send_poolreq (dhcp_failover_state_t *state)
        status = (dhcp_failover_put_message
                  (link, link -> outer,
                   FTM_POOLREQ,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
                   (failover_option_t *)0));
 
 #if defined (DEBUG_FAILOVER_MESSAGES)
@@ -3023,6 +3451,12 @@ isc_result_t dhcp_failover_send_poolresp (dhcp_failover_state_t *state,
        status = (dhcp_failover_put_message
                  (link, link -> outer,
                   FTM_POOLREQ,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
                   dhcp_failover_make_option (FTO_ADDRESSES_TRANSFERRED, FMA,
                                              leases),
                   (failover_option_t *)0));
@@ -3038,6 +3472,139 @@ isc_result_t dhcp_failover_send_poolresp (dhcp_failover_state_t *state,
        return status;
 }
 
+isc_result_t dhcp_failover_send_update_request (dhcp_failover_state_t *state)
+{
+       dhcp_failover_link_t *link;
+       isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)  
+       char obuf [64];
+       unsigned obufix = 0;
+       
+# define FMA obuf, &obufix, sizeof obuf
+       failover_print (FMA, "(updreq");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+       if (!state -> link_to_peer ||
+           state -> link_to_peer -> type != dhcp_type_failover_link)
+               return ISC_R_INVALIDARG;
+       link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+       if (!link -> outer || link -> outer -> type != omapi_type_connection)
+               return ISC_R_INVALIDARG;
+
+       status = (dhcp_failover_put_message
+                 (link, link -> outer,
+                  FTM_UPDREQ,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
+                  (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+       if (status != ISC_R_SUCCESS)
+               failover_print (FMA, " (failed)");
+       failover_print (FMA, ")");
+       if (obufix) {
+               log_debug ("%s", obuf);
+       }
+#endif
+       return status;
+}
+
+isc_result_t dhcp_failover_send_update_request_all (dhcp_failover_state_t
+                                                   *state)
+{
+       dhcp_failover_link_t *link;
+       isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)  
+       char obuf [64];
+       unsigned obufix = 0;
+       
+# define FMA obuf, &obufix, sizeof obuf
+       failover_print (FMA, "(updreqall");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+       if (!state -> link_to_peer ||
+           state -> link_to_peer -> type != dhcp_type_failover_link)
+               return ISC_R_INVALIDARG;
+       link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+       if (!link -> outer || link -> outer -> type != omapi_type_connection)
+               return ISC_R_INVALIDARG;
+
+       status = (dhcp_failover_put_message
+                 (link, link -> outer,
+                  FTM_UPDREQALL,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
+                  (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+       if (status != ISC_R_SUCCESS)
+               failover_print (FMA, " (failed)");
+       failover_print (FMA, ")");
+       if (obufix) {
+               log_debug ("%s", obuf);
+       }
+#endif
+       return status;
+}
+
+isc_result_t dhcp_failover_send_update_done (dhcp_failover_state_t *state)
+{
+       dhcp_failover_link_t *link;
+       isc_result_t status;
+#if defined (DEBUG_FAILOVER_MESSAGES)  
+       char obuf [64];
+       unsigned obufix = 0;
+       
+# define FMA obuf, &obufix, sizeof obuf
+       failover_print (FMA, "(upddone");
+#else
+# define FMA (char *)0, (unsigned *)0, 0
+#endif
+
+       if (!state -> link_to_peer ||
+           state -> link_to_peer -> type != dhcp_type_failover_link)
+               return ISC_R_INVALIDARG;
+       link = (dhcp_failover_link_t *)state -> link_to_peer;
+
+       if (!link -> outer || link -> outer -> type != omapi_type_connection)
+               return ISC_R_INVALIDARG;
+
+       status = (dhcp_failover_put_message
+                 (link, link -> outer,
+                  FTM_UPDDONE,
+                  dhcp_failover_make_option (FTO_SERVER_STATE, FMA,
+                                             state -> my_state),
+                  dhcp_failover_make_option
+                  (FTO_SERVER_FLAGS, FMA,
+                   (state -> service_state == service_startup
+                    ? FTF_STARTUP : 0)),
+                  (failover_option_t *)0));
+
+#if defined (DEBUG_FAILOVER_MESSAGES)
+       if (status != ISC_R_SUCCESS)
+               failover_print (FMA, " (failed)");
+       failover_print (FMA, ")");
+       if (obufix) {
+               log_debug ("%s", obuf);
+       }
+#endif
+       return status;
+}
+
 isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state,
                                               failover_message_t *msg)
 {
@@ -3108,14 +3675,17 @@ isc_result_t dhcp_failover_process_bind_update (dhcp_failover_state_t *state,
        }
 
        if (msg -> options_present & FTB_BINDING_STATUS) {
-               /* Check the requested transition to make sure it's
-                  valid. */
-               new_binding_state = (binding_state_transition_check
-                                    (lease, state, msg -> binding_status));
-               if (new_binding_state != msg -> binding_status) {
-                       dhcp_failover_send_bind_ack
-                               (state, lease, msg, FTR_FATAL_CONFLICT,
-                                "invalid binding state transition");
+               /* If we're in normal state, make sure the state transition
+                  we got is valid. */
+               if (state -> my_state == normal) {
+                       new_binding_state = (binding_state_transition_check
+                                            (lease, state,
+                                             msg -> binding_status));
+                       if (new_binding_state != msg -> binding_status) {
+                               dhcp_failover_send_bind_ack
+                                       (state, lease, msg, FTR_FATAL_CONFLICT,
+                                        "invalid binding state transition");
+                       }
                }
                lt -> next_binding_state = new_binding_state;
        }
@@ -3184,6 +3754,13 @@ isc_result_t dhcp_failover_process_bind_ack (dhcp_failover_state_t *state,
       unqueue:
        dhcp_failover_ack_queue_remove (state, lease);
 
+       /* If we are supposed to send an update done after we send
+          this lease, go ahead and send it. */
+       if (state -> send_update_done == lease) {
+               lease_dereference (&state -> send_update_done, MDL);
+               dhcp_failover_send_update_done (state);
+       }
+
        /* If there are updates pending, we've created space to send at
           least one. */
        dhcp_failover_send_updates (state);
@@ -3201,6 +3778,176 @@ isc_result_t dhcp_failover_process_bind_ack (dhcp_failover_state_t *state,
        goto out;
 }
 
+isc_result_t dhcp_failover_generate_update_queue (dhcp_failover_state_t *state,
+                                                 int everythingp)
+{
+       struct shared_network *s;
+       struct pool *p;
+       struct lease *l, *n;
+       int i;
+       struct lease **lptr [5];
+#define FREE_LEASES 0
+#define ACTIVE_LEASES 1
+#define EXPIRED_LEASES 2
+#define ABANDONED_LEASES 3
+#define BACKUP_LEASES 4
+
+       /* First remove everything from the update and ack queues. */
+       l = n = (struct lease *)0;
+       if (state -> update_queue_head) {
+               lease_reference (&l, state -> update_queue_head, MDL);
+               lease_dereference (&state -> update_queue_head, MDL);
+               do {
+                       l -> flags &= ~ON_UPDATE_QUEUE;
+                       if (l -> next_pending)
+                               lease_reference (&n,
+                                                l -> next_pending, MDL);
+                       lease_dereference (&l, MDL);
+                       if (n) {
+                               lease_reference (&l, n, MDL);
+                               lease_dereference (&n, MDL);
+                       }
+               } while (l);
+               lease_dereference (&state -> update_queue_tail, MDL);
+       }
+
+       if (state -> ack_queue_head) {
+               lease_reference (&l, state -> ack_queue_head, MDL);
+               lease_dereference (&state -> ack_queue_head, MDL);
+               do {
+                       l -> flags &= ~ON_ACK_QUEUE;
+                       if (l -> next_pending)
+                               lease_reference (&n,
+                                                l -> next_pending, MDL);
+                       lease_dereference (&l, MDL);
+                       if (n) {
+                               lease_reference (&l, n, MDL);
+                               lease_dereference (&n, MDL);
+                       }
+               } while (l);
+               lease_dereference (&state -> ack_queue_tail, MDL);
+       }
+       if (state -> send_update_done)
+               lease_dereference (&state -> send_update_done, MDL);
+                       
+       /* Loop through each pool in each shared network and call the
+          expiry routine on the pool. */
+       for (s = shared_networks; s; s = s -> next) {
+           for (p = s -> pools; p; p = p -> next) {
+               lptr [FREE_LEASES] = &p -> free;
+               lptr [ACTIVE_LEASES] = &p -> active;
+               lptr [EXPIRED_LEASES] = &p -> expired;
+               lptr [ABANDONED_LEASES] = &p -> abandoned;
+               lptr [BACKUP_LEASES] = &p -> backup;
+
+               for (i = FREE_LEASES; i <= BACKUP_LEASES; i++) {
+                   for (l = *(lptr [i]); l; l = l -> next) {
+                       if (p -> failover_peer == state &&
+                           (everythingp ||
+                            l -> tstp > l -> tsfp)) {
+                               dhcp_failover_queue_update (l, 0);
+                       }
+                   }
+               }
+           }
+       }
+       return ISC_R_SUCCESS;
+}
+
+isc_result_t
+dhcp_failover_process_update_request (dhcp_failover_state_t *state,
+                                     failover_message_t *msg)
+{
+       /* Make sure we catch the peer state change *first*. */
+       dhcp_failover_peer_state_changed (state, msg);
+
+       /* Generate a fresh update queue. */
+       dhcp_failover_generate_update_queue (state, 0);
+
+       /* If there's anything on the update queue (there shouldn't be
+          anything on the ack queue), trigger an update done message
+          when we get an ack for that lease. */
+       if (state -> update_queue_tail) {
+               lease_reference (&state -> send_update_done,
+                                state -> update_queue_tail, MDL);
+               dhcp_failover_send_updates (state);
+       } else {
+               /* Otherwise, there are no updates to send, so we can
+                  just send an UPDDONE message immediately. */
+               dhcp_failover_send_update_done (state);
+       }
+
+       return ISC_R_SUCCESS;
+}
+
+isc_result_t
+dhcp_failover_process_update_request_all (dhcp_failover_state_t *state,
+                                         failover_message_t *msg)
+{
+       /* Make sure we catch the peer state change *first*. */
+       dhcp_failover_peer_state_changed (state, msg);
+
+       /* Generate a fresh update queue that includes every lease. */
+       dhcp_failover_generate_update_queue (state, 1);
+
+       if (state -> update_queue_tail) {
+               lease_reference (&state -> send_update_done,
+                                state -> update_queue_tail, MDL);
+               dhcp_failover_send_updates (state);
+       } else {
+               /* This should really never happen, but it could happen
+                  on a server that currently has no leases configured. */
+               dhcp_failover_send_update_done (state);
+       }
+
+       return ISC_R_SUCCESS;
+}
+
+isc_result_t
+dhcp_failover_process_update_done (dhcp_failover_state_t *state,
+                                     failover_message_t *msg)
+{
+       /* Make sure we catch the peer state change *first*. */
+       dhcp_failover_peer_state_changed (state, msg);
+
+       switch (state -> my_state) {
+             case unknown_state:
+             case partner_down:
+             case normal:
+             case communications_interrupted:
+             case resolution_interrupted:
+             case shut_down:
+             case paused:
+             case recover_done:
+               break;  /* shouldn't happen. */
+
+               /* We got the UPDDONE, so we can go into normal state! */
+             case potential_conflict:
+               dhcp_failover_set_state (state, normal);
+               break;
+
+             case recover:
+               if (state -> my_stos + state -> mclt > cur_time)
+                       add_timeout ((int)(state -> my_stos + state -> mclt),
+                                    dhcp_failover_recover_done,
+                                    state,
+                                    (tvref_t)omapi_object_reference,
+                                    (tvunref_t)
+                                    omapi_object_dereference);
+               else
+                       dhcp_failover_recover_done (state);
+       }
+
+       return ISC_R_SUCCESS;
+}
+
+void dhcp_failover_recover_done (void *sp)
+{
+       dhcp_failover_state_t *state = sp;
+
+       dhcp_failover_set_state (state, recover_done);
+}
+
 #if defined (DEBUG_FAILOVER_MESSAGES)
 /* Print hunks of failover messages, doing line breaks as appropriate.
    Note that this assumes syslog is being used, rather than, e.g., the
@@ -3459,33 +4206,66 @@ binding_state_t binding_state_transition_check (struct lease *lease,
        return lease -> binding_state;
 }
 
-int lease_mine_to_extend (struct lease *lease)
+/* We can reallocate a lease under the following circumstances:
+
+   (1) It belongs to us - it's FTS_FREE, and we're primary, or it's
+       FTS_BACKUP, and we're secondary.
+   (2) We're in partner_down, and the lease is not active, and we
+       can be sure that the other server didn't make it active.
+       We can only be sure that the server didn't make it active
+       when we are in the partner_down state and one of the following
+       two conditions holds:
+       (a) in the case that the time sent from the peer is earlier than
+           the time we entered the partner_down state, at least MCLT has
+          gone by since we entered partner_down, or
+       (b) in the case that the time sent from the peer is later than
+           the time when we entered partner_down, the current time is
+          later than the time sent from the peer by at least MCLT. */
+
+int lease_mine_to_reallocate (struct lease *lease)
 {
        dhcp_failover_state_t *peer;
 
        if (lease && lease -> pool &&
            lease -> pool -> failover_peer) {
                peer = lease -> pool -> failover_peer;
-/* XXX This does't seem right - either peer can extend a lease to MCLT. */
-               if (peer -> my_state == normal) { /* XXX */
-                       switch (lease -> binding_state) {
-                             case FTS_ACTIVE:
-                             case FTS_ABANDONED:
-                             case FTS_RESET:
-                             case FTS_RELEASED:
-                             case FTS_EXPIRED:
-                             case FTS_FREE:
-                             case FTS_BOOTP:
-                             case FTS_RESERVED:
-                               if (peer -> i_am == secondary)
-                                       return 0;
-                               break;
-                             case FTS_BACKUP:
-                               if (peer -> i_am == primary)
-                                       return 0;
-                               break;
-                       }
+               switch (lease -> binding_state) {
+                     case FTS_ACTIVE:
+                       return 0;
+
+                     case FTS_FREE:
+                       if (peer -> i_am == primary)
+                               return 1;
+                       if (peer -> service_state == service_partner_down &&
+                           (lease -> tsfp < peer -> my_stos
+                            ? peer -> my_stos + peer -> mclt < cur_time
+                            : lease -> tsfp + peer -> mclt < cur_time))
+                               return 1;
+                       return 0;
+
+                     case FTS_ABANDONED:
+                     case FTS_RESET:
+                     case FTS_RELEASED:
+                     case FTS_EXPIRED:
+                     case FTS_BOOTP:
+                     case FTS_RESERVED:
+                       if (peer -> service_state == service_partner_down &&
+                           (lease -> tsfp < peer -> my_stos
+                            ? peer -> my_stos + peer -> mclt < cur_time
+                            : lease -> tsfp + peer -> mclt < cur_time))
+                               return 1;
+                       return 0;
+                     case FTS_BACKUP:
+                       if (peer -> i_am == secondary)
+                               return 1;
+                       if (peer -> service_state == service_partner_down &&
+                           (lease -> tsfp < peer -> my_stos
+                            ? peer -> my_stos + peer -> mclt < cur_time
+                            : lease -> tsfp + peer -> mclt < cur_time))
+                               return 1;
+                       return 0;
                }
+               return 0;
        }
        return 1;
 }