]> git.ipfire.org Git - thirdparty/kea.git/commitdiff
[#1403] Servers exchange ha-sync-complete-notify
authorMarcin Siodelski <marcin@isc.org>
Tue, 3 Aug 2021 10:17:39 +0000 (12:17 +0200)
committerMarcin Siodelski <marcin@isc.org>
Tue, 21 Sep 2021 09:25:19 +0000 (11:25 +0200)
A server finishing the lease database synchronization sends the
ha-sync-complete-notify command to the partner. If the partner is in the
partner-down state it stops allocating leases and sends a heartbeat to
see if the partner is available. If the partner is unavailable, it resumes
DHCP service. Otherwise, it transitions to the normal operation state.

src/hooks/dhcp/high_availability/ha_callouts.cc
src/hooks/dhcp/high_availability/ha_impl.cc
src/hooks/dhcp/high_availability/ha_impl.h
src/hooks/dhcp/high_availability/ha_messages.mes
src/hooks/dhcp/high_availability/ha_service.cc
src/hooks/dhcp/high_availability/ha_service.h
src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc

index 13b91f67c2ffcc27f04da4b71397419c9fc8f6e4..7c3546d34e605f57359d82fba3cfbef8a8f7246d 100644 (file)
@@ -283,6 +283,18 @@ int ha_reset_command(CalloutHandle& handle) {
     return (0);
 }
 
+/// @brief ha-sync-complete-notify command handler implementation.
+int sync_complete_notify_command(CalloutHandle& handle) {
+    try {
+        impl->syncCompleteNotifyHandler(handle);
+    } catch (const std::exception& ex) {
+        LOG_ERROR(ha_logger, HA_SYNC_COMPLETE_NOTIFY_HANDLER_FAILED)
+            .arg(ex.what());
+    }
+
+    return (0);
+}
+
 /// @brief This function is called when the library is loaded.
 ///
 /// @param handle library handle
@@ -321,6 +333,7 @@ int load(LibraryHandle& handle) {
         handle.registerCommandCallout("ha-maintenance-start", maintenance_start_command);
         handle.registerCommandCallout("ha-maintenance-cancel", maintenance_cancel_command);
         handle.registerCommandCallout("ha-reset", ha_reset_command);
+        handle.registerCommandCallout("ha-sync-complete-notify", sync_complete_notify_command);
 
     } catch (const std::exception& ex) {
         LOG_ERROR(ha_logger, HA_CONFIGURATION_FAILED)
index eb5b5d219ee9d49d7f236bda5215bc55340eb8c8..1c8d811e1727d7fbd959f112861c2f8e6b3ab2e4 100644 (file)
@@ -490,5 +490,11 @@ HAImpl::haResetHandler(hooks::CalloutHandle& callout_handle) {
     callout_handle.setArgument("response", response);
 }
 
+void
+HAImpl::syncCompleteNotifyHandler(hooks::CalloutHandle& callout_handle) {
+    ConstElementPtr response = service_->processSyncCompleteNotify();
+    callout_handle.setArgument("response", response);
+}
+
 } // end of namespace isc::ha
 } // end of namespace isc
index 60dd3bb77f9e7a3bbabf196b11ecfd43ef80353f..3d6611190b5d58975c36aee623124b3244c04147 100644 (file)
@@ -164,6 +164,11 @@ public:
     /// @param callout_handle Callout handle provided to the callout.
     void haResetHandler(hooks::CalloutHandle& callout_handle);
 
+    /// @brief Implements handler for the ha-sync-complete-notify command.
+    ///
+    /// @param callout_handle Callout handle provided to the callout.
+    void syncCompleteNotifyHandler(hooks::CalloutHandle& callout_handle);
+
 protected:
 
     /// @brief Holds parsed configuration.
index 25377955b672746505c971f2e5520dd6f2eeba77..662d77d970390d5c6f1748d32bfd22f3a66fff86 100644 (file)
@@ -554,6 +554,22 @@ This error message is issued to indicate that the lease database synchronization
 failed. The first argument provides the partner server's name. The second argument
 provides a reason for the failure.
 
+% HA_SYNC_COMPLETE_NOTIFY_COMMUNICATIONS_FAILED failed to send ha-sync-complete-notify to %1: %2
+This warning message indicates that there was a problem in communication with a
+HA peer while sending the ha-sync-complete-notify command. The first argument
+provides the remote server's name. The second argument provides a reason for
+failure.
+
+% HA_SYNC_COMPLETE_NOTIFY_FAILED error processing ha-sync-complete-notify command on %1: %2
+This warning message indicates that a peer returned an error status code
+in response to the ha-sync-complete-notify command.  The first argument provides
+the remote server's name. The second argument provides a reason for failure.
+
+% HA_SYNC_COMPLETE_NOTIFY_HANDLER_FAILED ha-sync-complete-notify command failed: %1
+This error message is issued to indicate that the ha-sync-complete-notify command
+handler failed while processing the command. The argument provides the reason for
+failure.
+
 % HA_SYNC_HANDLER_FAILED ha-sync command failed: %1
 This error message is issued to indicate that the ha-sync command handler
 failed while processing the command. The argument provides the reason for
index 370262967fa8fa59cc1818b3fcea49ef3fc461d0..b57cd68fb474437cd881fe519a90945e4f86b660 100644 (file)
@@ -17,6 +17,7 @@
 #include <dhcpsrv/cfgmgr.h>
 #include <dhcpsrv/lease_mgr.h>
 #include <dhcpsrv/lease_mgr_factory.h>
+#include <exceptions/exceptions.h>
 #include <http/date_time.h>
 #include <http/response_json.h>
 #include <http/post_request_json.h>
@@ -38,6 +39,17 @@ using namespace isc::log;
 using namespace isc::util;
 namespace ph = std::placeholders;
 
+namespace {
+
+/// @brief Exception thrown when command sent to the partner is unsupported.
+class CommandUnsupportedError : public CtrlChannelError {
+public:
+    CommandUnsupportedError(const char* file, size_t line, const char* what) :
+        CtrlChannelError(file, line, what) {}
+};
+
+}
+
 namespace isc {
 namespace ha {
 
@@ -49,13 +61,15 @@ const int HAService::HA_MAINTENANCE_NOTIFY_EVT;
 const int HAService::HA_MAINTENANCE_START_EVT;
 const int HAService::HA_MAINTENANCE_CANCEL_EVT;
 const int HAService::HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED;
+const int HAService::HA_SYNCED_PARTNER_UNAVAILABLE_EVT;
 
 HAService::HAService(const IOServicePtr& io_service, const NetworkStatePtr& network_state,
                      const HAConfigPtr& config, const HAServerType& server_type)
     : io_service_(io_service), network_state_(network_state), config_(config),
       server_type_(server_type), client_(), listener_(), communication_state_(),
       query_filter_(config), mutex_(), pending_requests_(),
-      lease_update_backlog_(config->getDelayedUpdatesLimit()) {
+      lease_update_backlog_(config->getDelayedUpdatesLimit()),
+      sync_complete_notified_(false) {
 
     if (server_type == HAServerType::DHCPv4) {
         communication_state_.reset(new CommunicationState4(io_service_, config));
@@ -123,6 +137,7 @@ HAService::defineEvents() {
     defineEvent(HA_MAINTENANCE_NOTIFY_EVT, "HA_MAINTENANCE_NOTIFY_EVT");
     defineEvent(HA_MAINTENANCE_START_EVT, "HA_MAINTENANCE_START_EVT");
     defineEvent(HA_MAINTENANCE_CANCEL_EVT, "HA_MAINTENANCE_CANCEL_EVT");
+    defineEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT, "HA_SYNCED_PARTNER_UNAVAILABLE_EVT");
 }
 
 void
@@ -136,6 +151,7 @@ HAService::verifyEvents() {
     getEvent(HA_MAINTENANCE_NOTIFY_EVT);
     getEvent(HA_MAINTENANCE_START_EVT);
     getEvent(HA_MAINTENANCE_CANCEL_EVT);
+    getEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT);
 }
 
 void
@@ -453,6 +469,14 @@ HAService::partnerDownStateHandler() {
             // receiving the ha-maintenance-start command let's log it.
             LOG_INFO(ha_logger, HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN);
         }
+
+    } else if (getLastEvent() == HA_SYNCED_PARTNER_UNAVAILABLE_EVT) {
+        // Partner sent the ha-sync-complete-notify command to indicate that
+        // it has successfully synchronized its lease database but this server
+        // was unable to send heartbeat to this server. Enable the DHCP service
+        // and continue serving the clients in the partner-down state until the
+        // communication with the partner is fixed.
+        adjustNetworkState();
     }
 
     scheduleHeartbeat();
@@ -1578,6 +1602,8 @@ HAService::processHAReset() {
 void
 HAService::asyncSendHeartbeat() {
     HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
+    bool sync_complete_notified = sync_complete_notified_;
+    sync_complete_notified_ = false;
 
     // Create HTTP/1.1 request including our command.
     PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
@@ -1595,7 +1621,7 @@ HAService::asyncSendHeartbeat() {
     client_->asyncSendRequest(partner_config->getUrl(),
                               partner_config->getTlsContext(),
                               request, response,
-        [this, partner_config]
+        [this, partner_config, sync_complete_notified]
             (const boost::system::error_code& ec,
              const HttpResponsePtr& response,
              const std::string& error_str) {
@@ -1682,12 +1708,18 @@ HAService::asyncSendHeartbeat() {
                 }
             }
 
+            startHeartbeat();
+            // Even though the partner notified us about the synchronization completion,
+            // we still can't communicate with the partner. Let's continue serving
+            // the clients until the link is fixed.
+            if (sync_complete_notified && !heartbeat_success) {
+                postNextEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT);
+            }
             // Whatever the result of the heartbeat was, the state machine needs
             // to react to this. Let's run the state machine until the state machine
             // finds that some new events are required, i.e. next heartbeat or
             // lease update.  The runModel() may transition to another state, schedule
             // asynchronous tasks etc. Then it returns control to the DHCP server.
-            startHeartbeat();
             runModel(HA_HEARTBEAT_COMPLETE_EVT);
         },
         HttpClient::RequestTimeout(TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST),
@@ -1749,6 +1781,7 @@ HAService::asyncDisableDHCPService(HttpClient& http_client,
              // when non-success error code is returned in the response carried
              // in the HTTP message or if the JSON response is otherwise broken.
 
+             int rcode = 0;
              std::string error_message;
 
              // Handle first two groups of errors.
@@ -1762,7 +1795,6 @@ HAService::asyncDisableDHCPService(HttpClient& http_client,
 
                  // Handle third group of errors.
                  try {
-                     int rcode = 0;
                      static_cast<void>(verifyAsyncResponse(response, rcode));
 
                  } catch (const std::exception& ex) {
@@ -1782,7 +1814,8 @@ HAService::asyncDisableDHCPService(HttpClient& http_client,
              // Invoke post request action if it was specified.
              if (post_request_action) {
                  post_request_action(error_message.empty(),
-                                     error_message);
+                                     error_message,
+                                     rcode);
              }
         },
         HttpClient::RequestTimeout(TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST),
@@ -1825,6 +1858,7 @@ HAService::asyncEnableDHCPService(HttpClient& http_client,
              // when non-success error code is returned in the response carried
              // in the HTTP message or if the JSON response is otherwise broken.
 
+             int rcode = 0;
              std::string error_message;
 
              // Handle first two groups of errors.
@@ -1838,7 +1872,6 @@ HAService::asyncEnableDHCPService(HttpClient& http_client,
 
                  // Handle third group of errors.
                  try {
-                     int rcode = 0;
                      static_cast<void>(verifyAsyncResponse(response, rcode));
 
                  } catch (const std::exception& ex) {
@@ -1858,7 +1891,8 @@ HAService::asyncEnableDHCPService(HttpClient& http_client,
              // Invoke post request action if it was specified.
              if (post_request_action) {
                  post_request_action(error_message.empty(),
-                                     error_message);
+                                     error_message,
+                                     rcode);
              }
         },
         HttpClient::RequestTimeout(TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST),
@@ -1909,7 +1943,7 @@ HAService::asyncSyncLeases(http::HttpClient& http_client,
     asyncDisableDHCPService(http_client, server_name, max_period,
                             [this, &http_client, server_name, max_period, last_lease,
                              post_sync_action, dhcp_disabled]
-                            (const bool success, const std::string& error_message) {
+                            (const bool success, const std::string& error_message, const int) {
 
         // If we have successfully disabled the DHCP service on the peer,
         // we can start fetching the leases.
@@ -2149,21 +2183,66 @@ HAService::synchronize(std::string& status_message, const std::string& server_na
         // we need to re-enable the DHCP service on the peer if the
         // DHCP service was disabled in the course of synchronization.
         if (dhcp_disabled) {
-            asyncEnableDHCPService(client, server_name,
-                                   [&](const bool success,
-                                       const std::string& error_message) {
-                // It is possible that we have already recorded an error
-                // message while synchronizing the lease database. Don't
-                // override the existing error message.
-                if (!success && status_message.empty()) {
-                    status_message = error_message;
-                }
+            // If the synchronization was completed successfully let's
+            // try to send the ha-sync-complete-notify command to the
+            // partner.
+            if (success) {
+                asyncSyncCompleteNotify(client, server_name,
+                                        [&](const bool success,
+                                            const std::string& error_message,
+                                            const int rcode) {
+                    // This command may not be supported by the partner when it
+                    // runs an older Kea version. In that case, send the dhcp-enable
+                    // command as in previous Kea version.
+                    if (rcode == CONTROL_RESULT_COMMAND_UNSUPPORTED) {
+                        asyncEnableDHCPService(client, server_name,
+                                               [&](const bool success,
+                                                   const std::string& error_message,
+                                                   const int rcode) {
+                            // It is possible that we have already recorded an error
+                            // message while synchronizing the lease database. Don't
+                            // override the existing error message.
+                            if (!success && status_message.empty()) {
+                                status_message = error_message;
+                            }
+
+                            // The synchronization process is completed, so let's break
+                            // the IO service so as we can return the response to the
+                            // controlling client.
+                            io_service.stop();
+                        });
+
+                    } else {
+                        // ha-sync-complete-notify command was delivered to the partner.
+                        // The synchronization process ends here.
+                        if (!success && status_message.empty()) {
+                            status_message = error_message;
+                        }
 
-                // The synchronization process is completed, so let's break
-                // the IO service so as we can return the response to the
-                // controlling client.
-                io_service.stop();
-            });
+                        io_service.stop();
+                    }
+                });
+
+            } else {
+                // Synchronization was unsuccessul. Send the dhcp-enable command to
+                //  re-enable the DHCP service. Note, that we don't send the
+                // ha-sync-complete-notify command in this case. It is only sent in
+                // the case when synchronization ends successfully.
+                asyncEnableDHCPService(client, server_name,
+                                       [&](const bool success,
+                                           const std::string& error_message,
+                                           const int rcode) {
+                    if (!success && status_message.empty()) {
+                        status_message = error_message;
+                    }
+
+                    // The synchronization process is completed, so let's break
+                    // the IO service so as we can return the response to the
+                    // controlling client.
+                    io_service.stop();
+
+                });
+            }
 
         } else {
             // Also stop IO service if there is no need to enable DHCP
@@ -2213,7 +2292,7 @@ HAService::asyncSendLeaseUpdatesFromBacklog(HttpClient& http_client,
                                             const HAConfig::PeerConfigPtr& config,
                                             PostRequestCallback post_request_action) {
     if (lease_update_backlog_.size() == 0) {
-        post_request_action(true, "");
+        post_request_action(true, "", CONTROL_RESULT_SUCCESS);
         return;
     }
 
@@ -2250,6 +2329,7 @@ HAService::asyncSendLeaseUpdatesFromBacklog(HttpClient& http_client,
              const HttpResponsePtr& response,
              const std::string& error_str) {
 
+             int rcode = 0;
              std::string error_message;
 
              if (ec || !error_str.empty()) {
@@ -2261,7 +2341,6 @@ HAService::asyncSendLeaseUpdatesFromBacklog(HttpClient& http_client,
              } else {
                  // Handle third group of errors.
                  try {
-                    int rcode = 0;
                     auto args = verifyAsyncResponse(response, rcode);
                  } catch (const std::exception& ex) {
                      error_message = ex.what();
@@ -2279,7 +2358,7 @@ HAService::asyncSendLeaseUpdatesFromBacklog(HttpClient& http_client,
              if (error_message.empty()) {
                  asyncSendLeaseUpdatesFromBacklog(http_client, config, post_request_action);
              } else {
-                 post_request_action(error_message.empty(), error_message);
+                 post_request_action(error_message.empty(), error_message, rcode);
              }
    });
 }
@@ -2302,7 +2381,7 @@ HAService::sendLeaseUpdatesFromBacklog() {
         .arg(remote_config->getName());
 
     asyncSendLeaseUpdatesFromBacklog(client, remote_config,
-                                     [&](const bool success, const std::string&) {
+                                     [&](const bool success, const std::string&, const int) {
         io_service.stop();
         updates_successful = success;
     });
@@ -2350,6 +2429,7 @@ HAService::asyncSendHAReset(HttpClient& http_client,
              const HttpResponsePtr& response,
              const std::string& error_str) {
 
+             int rcode = 0;
              std::string error_message;
 
              if (ec || !error_str.empty()) {
@@ -2361,7 +2441,6 @@ HAService::asyncSendHAReset(HttpClient& http_client,
              } else {
                  // Handle third group of errors.
                  try {
-                    int rcode = 0;
                     auto args = verifyAsyncResponse(response, rcode);
                  } catch (const std::exception& ex) {
                      error_message = ex.what();
@@ -2371,7 +2450,7 @@ HAService::asyncSendHAReset(HttpClient& http_client,
                  }
              }
 
-             post_request_action(error_message.empty(), error_message);
+             post_request_action(error_message.empty(), error_message, rcode);
    });
 }
 
@@ -2383,7 +2462,7 @@ HAService::sendHAReset() {
     bool reset_successful = true;
 
     asyncSendHAReset(client, remote_config,
-                     [&](const bool success, const std::string&) {
+                     [&](const bool success, const std::string&, const int) {
         io_service.stop();
         reset_successful = success;
     });
@@ -2677,6 +2756,98 @@ HAService::processMaintenanceCancel() {
                          "Server maintenance successfully canceled."));
 }
 
+void
+HAService::asyncSyncCompleteNotify(HttpClient& http_client,
+                                   const std::string& server_name,
+                                   PostRequestCallback post_request_action) {
+    HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
+
+    // Create HTTP/1.1 request including our command.
+    PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
+        (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
+         HostHttpHeader(remote_config->getUrl().getHostname()));
+
+    remote_config->addBasicAuthHttpHeader(request);
+    request->setBodyAsJson(CommandCreator::createSyncCompleteNotify(server_type_));
+    request->finalize();
+
+    // Response object should also be created because the HTTP client needs
+    // to know the type of the expected response.
+    HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
+
+    // Schedule asynchronous HTTP request.
+    http_client.asyncSendRequest(remote_config->getUrl(),
+                                 remote_config->getTlsContext(),
+                                 request, response,
+        [this, remote_config, post_request_action]
+            (const boost::system::error_code& ec,
+             const HttpResponsePtr& response,
+             const std::string& error_str) {
+
+             // There are three possible groups of errors. One is the IO error
+             // causing issues in communication with the peer. Another one is an
+             // HTTP parsing error. The last type of error is when non-success
+             // error code is returned in the response carried in the HTTP message
+             // or if the JSON response is otherwise broken.
+
+             int rcode = 0;
+             std::string error_message;
+
+             // Handle first two groups of errors.
+             if (ec || !error_str.empty()) {
+                 error_message = (ec ? ec.message() : error_str);
+                 LOG_ERROR(ha_logger, HA_SYNC_COMPLETE_NOTIFY_COMMUNICATIONS_FAILED)
+                     .arg(remote_config->getLogLabel())
+                     .arg(error_message);
+
+             } else {
+
+                 // Handle third group of errors.
+                 try {
+                     static_cast<void>(verifyAsyncResponse(response, rcode));
+
+                 } catch (const CommandUnsupportedError& ex) {
+                     rcode = CONTROL_RESULT_COMMAND_UNSUPPORTED;
+
+                 } catch (const std::exception& ex) {
+                     error_message = ex.what();
+                     LOG_ERROR(ha_logger, HA_SYNC_COMPLETE_NOTIFY_FAILED)
+                         .arg(remote_config->getLogLabel())
+                         .arg(error_message);
+                 }
+             }
+
+             // If there was an error communicating with the partner, mark the
+             // partner as unavailable.
+             if (!error_message.empty()) {
+                 communication_state_->setPartnerState("unavailable");
+             }
+
+             // Invoke post request action if it was specified.
+             if (post_request_action) {
+                 post_request_action(error_message.empty(),
+                                     error_message,
+                                     rcode);
+             }
+        },
+        HttpClient::RequestTimeout(TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST),
+        std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
+        std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
+        std::bind(&HAService::clientCloseHandler, this, ph::_1)
+    );
+}
+
+ConstElementPtr
+HAService::processSyncCompleteNotify() {
+    if (getCurrState() == HA_PARTNER_DOWN_ST) {
+        sync_complete_notified_ = true;
+    } else {
+        localEnableDHCPService();
+    }
+    return (createAnswer(CONTROL_RESULT_SUCCESS,
+                         "Server successfully notified about the synchronization completion."));
+}
+
 ConstElementPtr
 HAService::verifyAsyncResponse(const HttpResponsePtr& response, int& rcode) {
     // Set the return code to error in case of early throw.
@@ -2729,7 +2900,12 @@ HAService::verifyAsyncResponse(const HttpResponsePtr& response, int& rcode) {
         }
         // Include an error code.
         s << "error code " << rcode;
-        isc_throw(CtrlChannelError, s.str());
+
+        if (rcode == CONTROL_RESULT_COMMAND_UNSUPPORTED) {
+            isc_throw(CommandUnsupportedError, s.str());
+        } else {
+            isc_throw(CtrlChannelError, s.str());
+        }
     }
 
     return (args);
index 8eaaefdd950fbae7347e7ca57b7a6e666337064c..288a03671fbbbdad9a9558721c4397cc1bccb804 100644 (file)
@@ -64,6 +64,10 @@ public:
     /// ha-maintenance-cancel command received.
     static const int HA_MAINTENANCE_CANCEL_EVT = SM_DERIVED_EVENT_MIN + 7;
 
+    /// The heartbeat command failed after receiving ha-sync-complete-notify
+    /// command from the partner.
+    static const int HA_SYNCED_PARTNER_UNAVAILABLE_EVT = SM_DERIVED_EVENT_MIN + 8;
+
     /// Control result returned in response to ha-maintenance-notify.
     static const int HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED = 1001;
 
@@ -74,7 +78,8 @@ protected:
     ///
     /// The first argument indicates if the operation passed (when true).
     /// The second argument holds error message.
-   typedef std::function<void(const bool, const std::string&)> PostRequestCallback;
+    /// The third argument holds control status returned.
+   typedef std::function<void(const bool, const std::string&, const int)> PostRequestCallback;
 
     /// @brief Callback invoked when lease database synchronization is complete.
     ///
@@ -855,6 +860,10 @@ protected:
     ///
     /// It instructs the server to disable the DHCP service on the HA peer,
     /// fetch all leases from the peer and update the local lease database.
+    /// It sends ha-sync-complete-notify command to the partner when the
+    /// synchronization completes successfully. If the partner does not
+    /// support this command, it sends dhcp-enable command to enable
+    /// the DHCP service on the partner.
     ///
     /// This method creates its own instances of the HttpClient and IOService and
     /// invokes IOService::run().
@@ -1009,6 +1018,36 @@ public:
     /// any of the worker threads.
     void checkPermissionsClientAndListener();
 
+protected:
+
+    /// @brief Schedules asynchronous "ha-sync-complete-notify" command to the
+    /// specified server.
+    ///
+    /// @param http_client reference to the client to be used to communicate
+    /// with the other server.
+    /// @param server_name name of the server to which the command should be
+    /// sent.
+    /// @param post_request_action pointer to the function to be executed when
+    /// the request is completed.
+    void asyncSyncCompleteNotify(http::HttpClient& http_client,
+                                 const std::string& server_name,
+                                 PostRequestCallback post_request_action);
+
+public:
+
+    /// @brief Process ha-sync-complete-notify command and returns a response.
+    ///
+    /// A server finishing a lease database synchronization may notify its
+    /// partner about it with this command. This function implements reception
+    /// and processing of the command.
+    ///
+    /// It enables DHCP service unless the server is in the partner-down state.
+    /// In this state, the server will first have to check connectivity with
+    /// the partner and transition to a state in which it will send lease updates.
+    ///
+    /// @return Pointer to the response to the ha-sync-complete-notify command.
+    data::ConstElementPtr processSyncCompleteNotify();
+
     /// @brief Start the client and(or) listener instances.
     ///
     /// When HA+MT is enabled it starts the client's thread pool
@@ -1052,6 +1091,7 @@ protected:
     /// @param [out] rcode result found in the response.
     /// @return Pointer to the response arguments.
     /// @throw CtrlChannelError if response is invalid or contains an error.
+    /// @throw CommandUnsupportedError if sent command is unsupported.
     data::ConstElementPtr verifyAsyncResponse(const http::HttpResponsePtr& response,
                                               int& rcode);
 
@@ -1241,6 +1281,15 @@ protected:
     /// the communication-recovery state and is temporarily unable to send
     /// lease updates to the partner.
     LeaseUpdateBacklog lease_update_backlog_;
+
+    /// @brief An indicator that a partner sent ha-sync-complete-notify command.
+    ///
+    /// This indicator is set when the partner finished synchronization. It blocks
+    /// enabling DHCP service in the partner-down state. The server will first
+    /// send heartbeat to the partner to ensure that the communication is
+    /// re-established. If the communication remains broken, the server clears
+    /// this flag and enables DHCP service to continue the service.
+    bool sync_complete_notified_;
 };
 
 /// @brief Pointer to the @c HAService class.
index 23f6655c2f5d867ad7199d34424056fde9990c20..1ea1a1bfec0cb50bd08c894a3f30d4ad9528320c 100644 (file)
@@ -3380,10 +3380,11 @@ TEST_F(HAServiceTest, processSynchronize4) {
     }
 
     // The following commands should have been sent to the server2: dhcp-disable,
-    // lease4-get-page and dhcp-enable.
+    // lease4-get-page and ha-sync-complete-notify.
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease4-get-page",""));
-    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify", ""));
+    EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("dhcp-enable", ""));
 }
 
 // This test verifies that the ha-sync command is processed successfully for the
@@ -3416,10 +3417,11 @@ TEST_F(HAServiceTest, processSynchronize4Authorized) {
     }
 
     // The following commands should have been sent to the server2: dhcp-disable,
-    // lease4-get-page and dhcp-enable.
+    // lease4-get-page and ha-sync-complete-notify.
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease4-get-page",""));
-    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify", ""));
+    EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("dhcp-enable", ""));
 }
 
 // This test verifies that an error is reported when sending a dhcp-disable
@@ -3437,10 +3439,11 @@ TEST_F(HAServiceTest, processSynchronizeDisableError) {
     ASSERT_TRUE(rsp);
     checkAnswer(rsp, CONTROL_RESULT_ERROR);
 
-    // The server2 should only receive dhcp-disable commands. Remaining two should
+    // The server2 should only receive dhcp-disable command. Remaining three should
     // not be sent.
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("lease4-get-page",""));
+    EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
     EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
 }
 
@@ -3482,6 +3485,7 @@ TEST_F(HAServiceTest, processSynchronizeLease4GetPageError) {
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease4-get-page",""));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
+    EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
 }
 
 // This test verifies that an error is reported when sending a dhcp-enable
@@ -3491,6 +3495,11 @@ TEST_F(HAServiceTest, processSynchronizeEnableError) {
     factory2_->getResponseCreator()->setControlResult("dhcp-enable",
                                                       CONTROL_RESULT_ERROR);
 
+    // Return the unsupported command status for this command to enforce
+    // sending the dhcp-enable command.
+    factory2_->getResponseCreator()->setControlResult("ha-sync-complete-notify",
+                                                      CONTROL_RESULT_COMMAND_UNSUPPORTED);
+
     // Run HAService::processSynchronize and gather a response.
     ConstElementPtr rsp;
     runProcessSynchronize4(rsp);
@@ -3499,12 +3508,35 @@ TEST_F(HAServiceTest, processSynchronizeEnableError) {
     ASSERT_TRUE(rsp);
     checkAnswer(rsp, CONTROL_RESULT_ERROR);
 
-    // The server2 should receive all commands.
+    // The server2 should receive four commands of which ha-sync-complete-notify
+    // was unsupported.
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease4-get-page",""));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
 }
 
+// This test verifies that dhcp-enable command is not sent to the partner after
+// receiving an error to the ha-sync-complete-notify command.
+TEST_F(HAServiceTest, processSynchronizeNotifyError) {
+    // Return an error to the ha-sync-complete-notify command.
+    factory2_->getResponseCreator()->setControlResult("ha-sync-complete-notify",
+                                                      CONTROL_RESULT_ERROR);
+
+    // Run HAService::processSynchronize and gather a response.
+    ConstElementPtr rsp;
+    runProcessSynchronize4(rsp);
+
+    // The response should indicate an error
+    ASSERT_TRUE(rsp);
+    checkAnswer(rsp, CONTROL_RESULT_ERROR);
+
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease4-get-page",""));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
+    EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
+}
+
 // This test verifies that the ha-sync command is processed successfully for the
 // DHCPv6 server.
 TEST_F(HAServiceTest, processSynchronize6) {
@@ -3527,10 +3559,10 @@ TEST_F(HAServiceTest, processSynchronize6) {
     }
 
     // The following commands should have been sent to the server2: dhcp-disable,
-    // lease6-get-page and dhcp-enable.
+    // lease6-get-page and ha-sync-complete-notify.
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease6-get-page",""));
-    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
 }
 
 // This test verifies that the ha-sync command is processed successfully for the
@@ -3564,10 +3596,10 @@ TEST_F(HAServiceTest, processSynchronize6Authorized) {
     }
 
     // The following commands should have been sent to the server2: dhcp-disable,
-    // lease6-get-page and dhcp-enable.
+    // lease6-get-page and ha-sync-complete-notify.
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease6-get-page",""));
-    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
 }
 
 // This test verifies that an error is reported when sending a dhcp-disable
@@ -3585,10 +3617,10 @@ TEST_F(HAServiceTest, processSynchronize6DisableError) {
     ASSERT_TRUE(rsp);
     checkAnswer(rsp, CONTROL_RESULT_ERROR);
 
-    // The server2 should only receive dhcp-disable commands. Remaining two should
-    // not be sent.
+    // The server2 should only receive dhcp-disable command.
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("lease6-get-page",""));
+    EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
     EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
 }
 
@@ -3630,6 +3662,7 @@ TEST_F(HAServiceTest, processSynchronizeLease6GetPageError) {
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease6-get-page",""));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
+    EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
 }
 
 // This test verifies that an error is reported when sending a dhcp-enable
@@ -3639,6 +3672,11 @@ TEST_F(HAServiceTest, processSynchronize6EnableError) {
     factory2_->getResponseCreator()->setControlResult("dhcp-enable",
                                                       CONTROL_RESULT_ERROR);
 
+    // Return the unsupported command status for this command to enforce
+    // sending the dhcp-enable command.
+    factory2_->getResponseCreator()->setControlResult("ha-sync-complete-notify",
+                                                      CONTROL_RESULT_COMMAND_UNSUPPORTED);
+
     // Run HAService::processSynchronize and gather a response.
     ConstElementPtr rsp;
     runProcessSynchronize6(rsp);
@@ -3647,12 +3685,35 @@ TEST_F(HAServiceTest, processSynchronize6EnableError) {
     ASSERT_TRUE(rsp);
     checkAnswer(rsp, CONTROL_RESULT_ERROR);
 
-    // The server2 should receive all commands.
+    // The server2 should receive four commands of which ha-sync-complete-notify
+    // was unsupported.
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease6-get-page",""));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
     EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
 }
 
+// This test verifies that dhcp-enable command is not sent to the partner after
+// receiving an error to the ha-sync-complete-notify command.
+TEST_F(HAServiceTest, processSynchronize6NotifyError) {
+    // Return an error to the ha-sync-complete-notify command.
+    factory2_->getResponseCreator()->setControlResult("ha-sync-complete-notify",
+                                                      CONTROL_RESULT_ERROR);
+
+    // Run HAService::processSynchronize and gather a response.
+    ConstElementPtr rsp;
+    runProcessSynchronize6(rsp);
+
+    // The response should indicate an error
+    ASSERT_TRUE(rsp);
+    checkAnswer(rsp, CONTROL_RESULT_ERROR);
+
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("dhcp-disable","20"));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease6-get-page",""));
+    EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-sync-complete-notify",""));
+    EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("dhcp-enable",""));
+}
+
 // This test verifies that the DHCPv4 service can be disabled on the remote server.
 TEST_F(HAServiceTest, asyncDisableDHCPService4) {
     // Create HA configuration.
@@ -3672,7 +3733,8 @@ TEST_F(HAServiceTest, asyncDisableDHCPService4) {
     // When the transaction is finished, the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncDisableDHCPService("server3", 10,
                                                     [this](const bool success,
-                                                           const std::string& error_message) {
+                                                           const std::string& error_message,
+                                                           const int) {
         EXPECT_TRUE(success);
         EXPECT_TRUE(error_message.empty());
         io_service_->stop();
@@ -3717,7 +3779,8 @@ TEST_F(HAServiceTest, asyncDisableDHCPService4Authorized) {
     // When the transaction is finished, the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncDisableDHCPService("server3", 10,
                                                     [this](const bool success,
-                                                           const std::string& error_message) {
+                                                           const std::string& error_message,
+                                                           const int) {
         EXPECT_TRUE(success);
         EXPECT_TRUE(error_message.empty());
         io_service_->stop();
@@ -3745,7 +3808,8 @@ TEST_F(HAServiceTest, asyncDisableDHCPService4ServerOffline) {
     // When the transaction is finished, the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncDisableDHCPService("server2", 10,
                                                     [this](const bool success,
-                                                           const std::string& error_message) {
+                                                           const std::string& error_message,
+                                                           const int) {
         EXPECT_FALSE(success);
         EXPECT_FALSE(error_message.empty());
         io_service_->stop();
@@ -3779,7 +3843,8 @@ TEST_F(HAServiceTest, asyncDisableDHCPService4ControlResultError) {
     // When the transaction is finished, the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncDisableDHCPService("server3", 10,
                                                     [this](const bool success,
-                                                           const std::string& error_message) {
+                                                           const std::string& error_message,
+                                                           const int) {
         EXPECT_FALSE(success);
         EXPECT_FALSE(error_message.empty());
         io_service_->stop();
@@ -3812,7 +3877,8 @@ TEST_F(HAServiceTest, asyncDisableDHCPService4ControlResultUnauthorized) {
     // When the transaction is finished, the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncDisableDHCPService("server3", 10,
                                                     [this](const bool success,
-                                                           const std::string& error_message) {
+                                                           const std::string& error_message,
+                                                           const int) {
         EXPECT_FALSE(success);
         EXPECT_FALSE(error_message.empty());
         io_service_->stop();
@@ -3841,7 +3907,8 @@ TEST_F(HAServiceTest, asyncEnableDHCPService4) {
     // the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncEnableDHCPService("server2",
                                                    [this](const bool success,
-                                                          const std::string& error_message) {
+                                                          const std::string& error_message,
+                                                          const int) {
         EXPECT_TRUE(success);
         EXPECT_TRUE(error_message.empty());
         io_service_->stop();
@@ -3885,7 +3952,8 @@ TEST_F(HAServiceTest, asyncEnableDHCPService4Authorized) {
     // the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncEnableDHCPService("server2",
                                                    [this](const bool success,
-                                                          const std::string& error_message) {
+                                                          const std::string& error_message,
+                                                          const int) {
         EXPECT_TRUE(success);
         EXPECT_TRUE(error_message.empty());
         io_service_->stop();
@@ -3912,7 +3980,8 @@ TEST_F(HAServiceTest, asyncEnableDHCPService4ServerOffline) {
     // the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncEnableDHCPService("server2",
                                                    [this](const bool success,
-                                                          const std::string& error_message) {
+                                                          const std::string& error_message,
+                                                          const int) {
         EXPECT_FALSE(success);
         EXPECT_FALSE(error_message.empty());
         io_service_->stop();
@@ -3946,7 +4015,8 @@ TEST_F(HAServiceTest, asyncEnableDHCPService4ControlResultError) {
     // the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncEnableDHCPService("server2",
                                                    [this](const bool success,
-                                                          const std::string& error_message) {
+                                                          const std::string& error_message,
+                                                          const int) {
         EXPECT_FALSE(success);
         EXPECT_FALSE(error_message.empty());
         io_service_->stop();
@@ -3979,7 +4049,8 @@ TEST_F(HAServiceTest, asyncEnableDHCPService4ControlResultUnauthorized) {
     // the IO service gets stopped.
     ASSERT_NO_THROW(service.asyncEnableDHCPService("server2",
                                                    [this](const bool success,
-                                                          const std::string& error_message) {
+                                                          const std::string& error_message,
+                                                          const int) {
         EXPECT_FALSE(success);
         EXPECT_FALSE(error_message.empty());
         io_service_->stop();
@@ -4627,7 +4698,7 @@ TEST_F(HAServiceTest, processHAResetWaiting) {
     HAConfigPtr config_storage = createValidConfiguration();
     TestHAService service(io_service_, network_state_, config_storage);
 
-    // Transition the server to the load-balancing state.
+    // Transition the server to the waiting state.
     EXPECT_NO_THROW(service.transition(HA_WAITING_ST, HAService::NOP_EVT));
 
     // Process ha-reset command that should not change the state of the
@@ -4647,6 +4718,76 @@ TEST_F(HAServiceTest, processHAResetWaiting) {
     EXPECT_EQ(HA_WAITING_ST, service.getCurrState());
 }
 
+// This test verifies that the ha-sync-complete-notify command is processed
+// successfully, the server keeps the DHCP service disabled in the partner-down
+// state and enables the service when it is in another state.
+TEST_F(HAServiceTest, processSyncCompleteNotify) {
+    HAConfigPtr config_storage = createValidConfiguration();
+    TestHAService service(io_service_, network_state_, config_storage);
+
+    // Transition the server to the partner-down state.
+    EXPECT_NO_THROW(service.transition(HA_PARTNER_DOWN_ST, HAService::NOP_EVT));
+
+    // Simulate disabling the DHCP service for synchronization.
+    EXPECT_NO_THROW(service.network_state_->disableService(NetworkState::Origin::HA_COMMAND));
+
+    ConstElementPtr rsp;
+    EXPECT_NO_THROW(rsp = service.processSyncCompleteNotify());
+
+    ASSERT_TRUE(rsp);
+    checkAnswer(rsp, CONTROL_RESULT_SUCCESS,
+                "Server successfully notified about the synchronization completion.");
+
+    // The server should remain in the partner-down state.
+    EXPECT_EQ(HA_PARTNER_DOWN_ST, service.getCurrState());
+
+    // The service should be disabled until the server transitions to the
+    // normal state.
+    EXPECT_FALSE(service.network_state_->isServiceEnabled());
+
+    factory2_->getResponseCreator()->setControlResult(CONTROL_RESULT_ERROR);
+
+    EXPECT_NO_THROW(rsp = service.processSyncCompleteNotify());
+
+    ASSERT_TRUE(rsp);
+    checkAnswer(rsp, CONTROL_RESULT_SUCCESS,
+                "Server successfully notified about the synchronization completion.");
+
+    // The server should remain in the partner-down state.
+    EXPECT_EQ(HA_PARTNER_DOWN_ST, service.getCurrState());
+
+    // The service should be disabled to avoid allocating new leases before
+    // the server transitions to the normal state.
+    EXPECT_FALSE(service.network_state_->isServiceEnabled());
+
+    // It is possible that the connection from this server to the partner
+    // is still broken. In that case, the HA_SYNCED_PARTNER_UNAVAILABLE_EVT
+    // is emitted and the server should enable DHCP service to continue
+    // serving the clients in the partner-down state.
+    EXPECT_NO_THROW(service.postNextEvent(HAService::HA_SYNCED_PARTNER_UNAVAILABLE_EVT));
+    EXPECT_NO_THROW(service.runModel(HAService::NOP_EVT));
+    EXPECT_TRUE(service.network_state_->isServiceEnabled());
+
+    // Transition the server to the load-balancing state.
+    EXPECT_NO_THROW(service.transition(HA_LOAD_BALANCING_ST, HAService::NOP_EVT));
+
+    // Disable the service again.
+    EXPECT_NO_THROW(service.network_state_->disableService(NetworkState::Origin::HA_COMMAND));
+
+    EXPECT_NO_THROW(rsp = service.processSyncCompleteNotify());
+
+    ASSERT_TRUE(rsp);
+    checkAnswer(rsp, CONTROL_RESULT_SUCCESS,
+                "Server successfully notified about the synchronization completion.");
+
+    // The server should remain in the load-balancing state.
+    EXPECT_EQ(HA_LOAD_BALANCING_ST, service.getCurrState());
+
+    // This time the service should be enabled because the server is in
+    // the state in which it sends the lease updates.
+    EXPECT_TRUE(service.network_state_->isServiceEnabled());
+}
+
 /// @brief HA partner to the server under test.
 ///
 /// This is a wrapper class around @c HttpListener which simulates a