[#1402] Send ha-reset to partner

author Marcin Siodelski <marcin@isc.org>

Tue, 12 Jan 2021 09:39:04 +0000 (10:39 +0100)

committer Marcin Siodelski <marcin@isc.org>

Wed, 13 Jan 2021 09:12:32 +0000 (10:12 +0100)
author Marcin Siodelski <marcin@isc.org>
Tue, 12 Jan 2021 09:39:04 +0000 (10:39 +0100)
committer Marcin Siodelski <marcin@isc.org>
Wed, 13 Jan 2021 09:12:32 +0000 (10:12 +0100)
diff --git a/src/hooks/dhcp/high_availability/ha_messages.cc b/src/hooks/dhcp/high_availability/ha_messages.cc

index 8b96d81ed7d836cefaab4b6ea33a82d695f5a875..00a6de805c63aed0e69d6d0cbe55f3a181d4bcc2 100644 (file)
--- a/src/hooks/dhcp/high_availability/ha_messages.cc
+++ b/src/hooks/dhcp/high_availability/ha_messages.cc
@@ -82,6 +82,8 @@ extern const isc::log::MessageID HA_MAINTENANCE_STARTED = "HA_MAINTENANCE_STARTE
  extern const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN = "HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN";
  extern const isc::log::MessageID HA_MAINTENANCE_START_HANDLER_FAILED = "HA_MAINTENANCE_START_HANDLER_FAILED";
  extern const isc::log::MessageID HA_MISSING_CONFIGURATION = "HA_MISSING_CONFIGURATION";
+extern const isc::log::MessageID HA_RESET_COMMUNICATIONS_FAILED = "HA_RESET_COMMUNICATIONS_FAILED";
+extern const isc::log::MessageID HA_RESET_FAILED = "HA_RESET_FAILED";
  extern const isc::log::MessageID HA_RESET_HANDLER_FAILED = "HA_RESET_HANDLER_FAILED";
  extern const isc::log::MessageID HA_SCOPES_HANDLER_FAILED = "HA_SCOPES_HANDLER_FAILED";
  extern const isc::log::MessageID HA_SERVICE_STARTED = "HA_SERVICE_STARTED";
@@ -177,6 +179,8 @@ const char* values[] = {
      "HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN", "the server is now in the partner-down mode as a result of requested maintenance",
      "HA_MAINTENANCE_START_HANDLER_FAILED", "ha-maintenance-start command failed: %1",
      "HA_MISSING_CONFIGURATION", "high-availability parameter not specified for High Availability hooks library",
+    "HA_RESET_COMMUNICATIONS_FAILED", "failed to send ha-reset command to %1: %2",
+    "HA_RESET_FAILED", "failed to reset HA state machine of %1: %2",
      "HA_RESET_HANDLER_FAILED", "ha-reset command failed: %1",
      "HA_SCOPES_HANDLER_FAILED", "ha-scopes command failed: %1",
      "HA_SERVICE_STARTED", "started high availability service in %1 mode as %2 server",
diff --git a/src/hooks/dhcp/high_availability/ha_messages.h b/src/hooks/dhcp/high_availability/ha_messages.h

index 600bc1b453898374baddf99a2e35382a3c2b0029..0f5d1fd951f3cf319c1b037b7dac4e1bed6d827a 100644 (file)
--- a/src/hooks/dhcp/high_availability/ha_messages.h
+++ b/src/hooks/dhcp/high_availability/ha_messages.h
@@ -83,6 +83,8 @@ extern const isc::log::MessageID HA_MAINTENANCE_STARTED;
  extern const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN;
  extern const isc::log::MessageID HA_MAINTENANCE_START_HANDLER_FAILED;
  extern const isc::log::MessageID HA_MISSING_CONFIGURATION;
+extern const isc::log::MessageID HA_RESET_COMMUNICATIONS_FAILED;
+extern const isc::log::MessageID HA_RESET_FAILED;
  extern const isc::log::MessageID HA_RESET_HANDLER_FAILED;
  extern const isc::log::MessageID HA_SCOPES_HANDLER_FAILED;
  extern const isc::log::MessageID HA_SERVICE_STARTED;
diff --git a/src/hooks/dhcp/high_availability/ha_messages.mes b/src/hooks/dhcp/high_availability/ha_messages.mes

index c9e0240b7130ecfee9f8d98cc313bf73673f8755..484c4315426bd96bd535379148c0c5b6bdfe0bd4 100644 (file)
--- a/src/hooks/dhcp/high_availability/ha_messages.mes
+++ b/src/hooks/dhcp/high_availability/ha_messages.mes
@@ -470,6 +470,16 @@ This error message is issued to indicate that the configuration for the
  High Availability hooks library hasn't been specified. The 'high-availability'
  parameter must be specified for the hooks library to load properly.
  
+% HA_RESET_COMMUNICATIONS_FAILED failed to send ha-reset command to %1: %2
+This warning message indicates a problem with communication with a HA peer
+while sending the ha-reset command. The first argument specifies a remote
+server name. The second argument specifies a reason for failure.
+
+% HA_RESET_FAILED failed to reset HA state machine of %1: %2
+This warning message indicates that a peer returned an error status code
+in response to the ha-reset command.  The first argument specifies a
+remote server name. The second argument specifies a reason for failure.
+
  % HA_RESET_HANDLER_FAILED ha-reset command failed: %1
  This error message is issued to indicate that the ha-reset command handler
  failed while processing the command. The argument provides the reason for
diff --git a/src/hooks/dhcp/high_availability/ha_service.cc b/src/hooks/dhcp/high_availability/ha_service.cc

index c1c87f7e434b346b4fc0a5efaceb28a719ba4b3b..dca4280316d410e19086ffe2faeeee43cf241c6f 100644 (file)
--- a/src/hooks/dhcp/high_availability/ha_service.cc
+++ b/src/hooks/dhcp/high_availability/ha_service.cc
@@ -219,15 +219,24 @@ HAService::communicationRecoveryHandler() {
          case HA_WAITING_ST:
          case HA_SYNCING_ST:
          case HA_READY_ST:
-            // The partner seems to be waking up. Let's wait for it to get to
-            // the load-balancing state before we transition to the load-balancing
-            // state.
-            postNextEvent(NOP_EVT);
+            // The partner seems to be waking up, perhaps after communication-recovery.
+            // If our backlog queue is overflown we need to synchronize our lease database.
+            // There is no need to send ha-reset to the partner because the partner is
+            // already synchronizing its lease database.
+            if (!communication_state_->isCommunicationInterrupted() &&
+                lease_update_backlog_.wasOverflown()) {
+                verboseTransition(HA_WAITING_ST);
+            } else {
+                // Backlog was not overflown, so there is no need to synchronize our
+                // lease database. Let's wait until our partner completes synchronization
+                // and transitions to the load-balancing state.
+                postNextEvent(NOP_EVT);
+            }
              break;
  
          default:
-            // If the communication is still interrupted let's continue sitting
-            // in this state until it is resumed or until transition to the
+            // If the communication is still interrupted, let's continue sitting
+            // in this state until it is resumed or until the transition to the
              // partner-down state, depending on what happens first.
              if (communication_state_->isCommunicationInterrupted()) {
                  postNextEvent(NOP_EVT);
@@ -236,14 +245,24 @@ HAService::communicationRecoveryHandler() {
  
              // The communication has been resumed. The partner server must be in a state
              // in which it can receive outstanding lease updates we collected. The number of
-            // oustanding lease updates must not exceed the configured limit. Finally, the
-            // lease updates must be successfully send. If that all works, we will transition
+            // outstanding lease updates must not exceed the configured limit. Finally, the
+            // lease updates must be successfully sent. If that all works, we will transition
              // to the normal operation.
-            if ((communication_state_->getPartnerState() == getNormalState() ||
-                 (communication_state_->getPartnerState() == HA_COMMUNICATION_RECOVERY_ST)) &&
-                !lease_update_backlog_.wasOverflown() &&
-                sendLeaseUpdatesFromBacklog()) {
-                // Everything went fine, so we can go back to the normal operation.
+            if ((communication_state_->getPartnerState() == getNormalState()) ||
+                (communication_state_->getPartnerState() == HA_COMMUNICATION_RECOVERY_ST)) {
+                if (lease_update_backlog_.wasOverflown() || !sendLeaseUpdatesFromBacklog()) {
+                    // If our lease backlog was overflown or we were unable to send lease
+                    // updates to the partner we should notify the partner that it should
+                    // synchronize the lease database. We do it by sending ha-reset command.
+                    if (sendHAReset()) {
+                        verboseTransition(HA_WAITING_ST);
+                    }
+                    break;
+                }
+                // The backlog was not overflown and we successfully sent our lease updates.
+                // We can now transition to the normal operation state. If the partner
+                // fails to send his outstanding lease updates to us it should send the
+                // ha-reset command to us.
                  verboseTransition(getNormalState());
                  break;
              }
@@ -2251,6 +2270,74 @@ HAService::sendLeaseUpdatesFromBacklog() {
      return (updates_successful);
  }
  
+void
+HAService::asyncSendHAReset(HttpClient& http_client,
+                            const HAConfig::PeerConfigPtr& config,
+                            PostRequestCallback post_request_action) {
+    ConstElementPtr command = CommandCreator::createHAReset(server_type_);
+
+    // Create HTTP/1.1 request including our command.
+    PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
+        (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
+         HostHttpHeader(config->getUrl().getHostname()));
+    config->addBasicAuthHttpHeader(request);
+    request->setBodyAsJson(command);
+    request->finalize();
+
+    // Response object should also be created because the HTTP client needs
+    // to know the type of the expected response.
+    HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
+
+    http_client.asyncSendRequest(config->getUrl(), request, response,
+        [this, config, post_request_action]
+            (const boost::system::error_code& ec,
+             const HttpResponsePtr& response,
+             const std::string& error_str) {
+
+             std::string error_message;
+
+             if (ec || !error_str.empty()) {
+                 error_message = (ec ? ec.message() : error_str);
+                 LOG_WARN(ha_logger, HA_RESET_COMMUNICATIONS_FAILED)
+                     .arg(config->getLogLabel())
+                     .arg(ec ? ec.message() : error_str);
+
+             } else {
+                 // Handle third group of errors.
+                 try {
+                    int rcode = 0;
+                    auto args = verifyAsyncResponse(response, rcode);
+                 } catch (const std::exception& ex) {
+                     error_message = ex.what();
+                     LOG_WARN(ha_logger, HA_RESET_FAILED)
+                         .arg(config->getLogLabel())
+                         .arg(ex.what());
+                 }
+             }
+
+             post_request_action(error_message.empty(), error_message);
+   });
+}
+
+bool
+HAService::sendHAReset() {
+    IOService io_service;
+    HttpClient client(io_service);
+    auto remote_config = config_->getFailoverPeerConfig();
+    bool reset_successful = true;
+
+    asyncSendHAReset(client, remote_config,
+                     [&](const bool success, const std::string&) {
+        io_service.stop();
+        reset_successful = success;
+    });
+
+    // Run the IO service until it is stopped by the callback. This makes it synchronous.
+    io_service.run();
+
+    return (reset_successful);
+}
+
  ConstElementPtr
  HAService::processScopes(const std::vector<std::string>& scopes) {
      try {
diff --git a/src/hooks/dhcp/high_availability/ha_service.h b/src/hooks/dhcp/high_availability/ha_service.h

index 9cd153544594861dcee578474bb549f6639a054b..2740bc8271fc5b86e94a5b38a8a5fd50b3699597 100644 (file)
--- a/src/hooks/dhcp/high_availability/ha_service.h
+++ b/src/hooks/dhcp/high_availability/ha_service.h
@@ -896,6 +896,30 @@ protected:
      /// successfully (when true) or unsuccessfully (when false).
      bool sendLeaseUpdatesFromBacklog();
  
+    /// @brief Sends ha-reset command to partner asynchronously.
+    ///
+    /// @param http_client reference to the HTTP client to be used for communication.
+    /// @param remote_config pointer to the remote server's configuration.
+    /// @param post_request_action callback to be invoked when the operation
+    /// completes. It can be used for handling errors.
+    void asyncSendHAReset(http::HttpClient& http_client,
+                          const HAConfig::PeerConfigPtr& remote_config,
+                          PostRequestCallback post_request_action);
+
+    /// @brief Sends ha-reset command to partner synchronously.
+    ///
+    /// This method attempts to send ha-reset command to the active partner
+    /// synchronously. It may be invoked when the communication with the partner
+    /// is re-established after temporary failure. It causes the partner to
+    /// transition the partner to the waiting state. This effectively means that
+    /// the partner will synchronize its lease database with this server.
+    ///
+    /// This method creates its own instances of the HttpClient and IOService and
+    /// invokes IOService::run().
+    ///
+    /// @return true if the command was sent successfully, false otherwise.
+    bool sendHAReset();
+
  public:
  
      /// @brief Processes ha-scopes command and returns a response.
diff --git a/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc b/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc

index dbfe177045c7afeb71828856503507dfb5dc772d..961595a08d5c3a012139da8c0e62e852e48b87a1 100644 (file)
--- a/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc
+++ b/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc
@@ -769,6 +769,7 @@ public:
          // server 1.
          HAConfigPtr config_storage = createValidConfiguration();
          config_storage->setWaitBackupAck(wait_backup_ack);
+        config_storage->setDelayedUpdatesLimit(10);
          setBasicAuth(config_storage);
  
          // Create parking lot where query is going to be parked and unparked.
@@ -871,6 +872,7 @@ public:
          // Create HA configuration for 3 servers. This server is
          // server 1.
          HAConfigPtr config_storage = createValidConfiguration();
+        config_storage->setDelayedUpdatesLimit(10);
          config_storage->setWaitBackupAck(wait_backup_ack);
          setBasicAuth(config_storage);
  
@@ -1111,12 +1113,26 @@ public:
          EXPECT_EQ(0, service_->lease_update_backlog_.size());
      }
  
-    /// @brief Tests that a DHCPv4 server trying to recover from the communication
-    /// interruption transitions to the waiting state if the partner refuses delayed
-    /// lease updates.
-    void testSendUpdatesCommunicationRecoveryFailed() {
-        // Simulate that the partner returns an error.
-        factory2_->getResponseCreator()->setControlResult(CONTROL_RESULT_ERROR);
+    /// @brief Test the cases when the trying to recover from the communication
+    /// interruption and sending lease updates or/and ha-reset fails.
+    ///
+    /// @param partner_state partner state when communication is re-established.
+    /// @param lease_update_result control result returned to lease updates.
+    /// @param ha_reset_result control result returned to ha-reset command.
+    /// @param overflow boolean value indicating if this test should verify the
+    /// case when the leases backlog is overflown (when true), or not (when
+    /// false).
+    void testSendUpdatesCommunicationRecoveryFailed(const std::string& partner_state,
+                                                    const int lease_update_result,
+                                                    const int ha_reset_result,
+                                                    const bool overflow = false) {
+        // Partner responds with a specified control result to lease updates.
+        factory2_->getResponseCreator()->setControlResult("lease4-update",
+                                                          lease_update_result);
+        factory2_->getResponseCreator()->setControlResult("lease4-del",
+                                                          lease_update_result);
+        // Partner returns specified control result to ha-reset.
+        factory2_->getResponseCreator()->setControlResult("ha-reset", ha_reset_result);
  
          // This flag will be set to true if unpark is called.
          bool unpark_called = false;
@@ -1130,9 +1146,18 @@ public:
          // Let's make sure they have been queued.
          EXPECT_EQ(2, service_->lease_update_backlog_.size());
  
+        // When testing the case when the backlog should be overflown, we need
+        // to add several more leases to the backlog to exceed the limit.
+        if (overflow) {
+            ASSERT_NO_THROW(generateTestLeases4());
+            for (auto lease : leases4_) {
+                service_->lease_update_backlog_.push(LeaseUpdateBacklog::ADD, lease);
+            }
+        }
+
          // Make partner available.
          service_->communication_state_->poke();
-        service_->communication_state_->setPartnerState("load-balancing");
+        service_->communication_state_->setPartnerState(partner_state);
  
          // Start HTTP servers.
          ASSERT_NO_THROW({
@@ -1142,21 +1167,45 @@ public:
          });
  
          // This should cause the server to attempt to send outstanding lease
-        // updates to the partner. The partner reports an error so that should
-        // cause this server to transition to the waiting state from which it
-        // will recover doing full lease database synchronization.
-        testSynchronousCommands([this]() {
+        // updates to the partner.
+        testSynchronousCommands([this, ha_reset_result]() {
              service_->runModel(HAService::NOP_EVT);
-            EXPECT_EQ(HA_WAITING_ST, service_->getCurrState());
+            // If the ha-reset returns success the server should transition to the
+            // waiting state and begin synchronization. Otherwise, if the ha-reset
+            // fails the server should wait in the communication-recovery state
+            // until it succeeds.
+            if (ha_reset_result == CONTROL_RESULT_SUCCESS) {
+                EXPECT_EQ(HA_WAITING_ST, service_->getCurrState());
+            } else {
+                EXPECT_EQ(HA_COMMUNICATION_RECOVERY_ST, service_->getCurrState());
+            }
          });
  
-        // Deletions are scheduled first and this should cause the failure.
-        EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease4-del",
-                                                                 "192.2.3.4"));
-        // This lease update should not be sent because the first update
-        // triggered an error.
-        EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("lease4-update",
-                                                                 "192.1.2.3"));
+        // The server will only send lease updates if it is not overflown. If
+        // it is overflown, it will rather transition to the waiting state to
+        // initiate full synchronization.
+        if (!overflow) {
+            // Deletions are scheduled first and this should cause the failure.
+            EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease4-del",
+                                                                     "192.2.3.4"));
+            // This lease update should not be sent because the first update
+            // triggered an error.
+            EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("lease4-update",
+                                                                      "192.1.2.3"));
+        }
+
+        if ((partner_state == "load-balancing") || (partner_state == "communication-recovery")) {
+            // The lease updates failed and the partner remains in the load-balancing or
+            // communication-recovery state, so the server should send ha-reset to the
+            // partner to cause it to transition to the waiting state and synchronize
+            // the lease database.
+            EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-reset", ""));
+        } else {
+            // The lease updates failed but the partner is already synchronizing lease
+            // database. In this case, don't send the ha-reset.
+            EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("ha-reset", ""));
+        }
+
          // The backlog should be empty.
          EXPECT_EQ(0, service_->lease_update_backlog_.size());
      }
@@ -1434,12 +1483,25 @@ public:
          EXPECT_EQ(0, service_->lease_update_backlog_.size());
      }
  
-    /// @brief Tests that a DHCPv6 server trying to recover from the communication
-    /// interruption transitions to the waiting state if the partner refuses delayed
-    /// lease updates.
-    void testSendUpdatesCommunicationRecovery6Failed() {
-        // Simulate that the partner returns an error.
-        factory2_->getResponseCreator()->setControlResult(CONTROL_RESULT_ERROR);
+    /// @brief Test the cases when the trying to recover from the communication
+    /// interruption and sending lease updates or/and ha-reset fails.
+    ///
+    /// @param partner_state partner state when communication is re-established.
+    /// @param lease_update_result control result returned to lease updates.
+    /// @param ha_reset_result control result returned to ha-reset command.
+    /// @param overflow boolean value indicating if this test should verify the
+    /// case when the leases backlog is overflown (when true), or not (when
+    /// false).
+    void testSendUpdatesCommunicationRecovery6Failed(const std::string& partner_state,
+                                                     const int lease_update_result,
+                                                     const int ha_reset_result,
+                                                     const bool overflow = false) {
+        // Partner responds with a specified control result to lease updates.
+        factory2_->getResponseCreator()->setControlResult("lease6-bulk-apply",
+                                                          lease_update_result);
+        // Partner returns specified control result to ha-reset.
+        factory2_->getResponseCreator()->setControlResult("ha-reset",
+                                                          ha_reset_result);
  
          // This flag will be set to true if unpark is called.
          bool unpark_called = false;
@@ -1453,9 +1515,18 @@ public:
          // Let's make sure they have been queued.
          EXPECT_EQ(2, service_->lease_update_backlog_.size());
  
+        // When testing the case when the backlog should be overflown, we need
+        // to add several more leases to the backlog to exceed the limit.
+        if (overflow) {
+            ASSERT_NO_THROW(generateTestLeases6());
+            for (auto lease : leases6_) {
+                service_->lease_update_backlog_.push(LeaseUpdateBacklog::ADD, lease);
+            }
+        }
+
          // Make partner available.
          service_->communication_state_->poke();
-        service_->communication_state_->setPartnerState("load-balancing");
+        service_->communication_state_->setPartnerState(partner_state);
  
          // Start HTTP servers.
          ASSERT_NO_THROW({
@@ -1465,18 +1536,41 @@ public:
          });
  
          // This should cause the server to attempt to send outstanding lease
-        // updates to the partner. The partner reports an error so that should
-        // cause this server to transition to the waiting state from which it
-        // will recover doing full lease database synchronization.
-        testSynchronousCommands([this]() {
+        // updates to the partner.
+        testSynchronousCommands([this, ha_reset_result]() {
              service_->runModel(HAService::NOP_EVT);
-            EXPECT_EQ(HA_WAITING_ST, service_->getCurrState());
+            // If the ha-reset returns success the server should transition to the
+            // waiting state and begin synchronization. Otherwise, if the ha-reset
+            // fails the server should wait in the communication-recovery state
+            // until it succeeds.
+            if (ha_reset_result == CONTROL_RESULT_SUCCESS) {
+                EXPECT_EQ(HA_WAITING_ST, service_->getCurrState());
+            } else {
+                EXPECT_EQ(HA_COMMUNICATION_RECOVERY_ST, service_->getCurrState());
+            }
          });
  
-        // The server should have sent lease updates in a single command.
-        EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease6-bulk-apply",
-                                                                 "2001:db8:1::cafe",
-                                                                 "2001:db8:1::efac"));
+        // The server will only send lease updates if it is not overflown. If
+        // it is overflown, it will rather transition to the waiting state to
+        // initiate full synchronization.
+        if (!overflow) {
+            // The server should have sent lease updates in a single command.
+            EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("lease6-bulk-apply",
+                                                                     "2001:db8:1::cafe",
+                                                                     "2001:db8:1::efac"));
+        }
+
+        if ((partner_state == "load-balancing") || (partner_state == "communication-recovery")) {
+            // The lease updates failed and the partner remains in the load-balancing or
+            // communication-recovery state, so the server should send ha-reset to the
+            // partner to cause it to transition to the waiting state and synchronize
+            // the lease database.
+            EXPECT_TRUE(factory2_->getResponseCreator()->findRequest("ha-reset", ""));
+        } else {
+            // The lease updates failed but the partner is already synchronizing lease
+            // database. In this case, don't send the ha-reset.
+            EXPECT_FALSE(factory2_->getResponseCreator()->findRequest("ha-reset", ""));
+        }
  
          // Backlog should be empty.
          EXPECT_EQ(0, service_->lease_update_backlog_.size());
@@ -1999,16 +2093,62 @@ TEST_F(HAServiceTest, sendUpdatesCommunicationRecoveryMultiThreading) {
  }
  
  // Test scenario when lease updates are queued in the communication-recovery
-// state for later send.
-TEST_F(HAServiceTest, sendUpdatesCommunicationRecoveryFailed) {
-    testSendUpdatesCommunicationRecoveryFailed();
+// state and sending them later is unsuccessful. Partner is in load-balancing
+// state when the communication is re-established, so the test expects that the
+// ha-reset command is sent to the partner.
+TEST_F(HAServiceTest, communicationRecoveryFailedPartnerLoadBalancing) {
+    testSendUpdatesCommunicationRecoveryFailed("load-balancing", CONTROL_RESULT_ERROR,
+                                               CONTROL_RESULT_SUCCESS);
  }
  
  // Test scenario when lease updates are queued in the communication-recovery
-// state for later send. Multi threading case.
-TEST_F(HAServiceTest, sendUpdatesCommunicationRecoveryFailedMultiThreading) {
+// state and sending them later is unsuccessful. Partner is in load-balancing
+// state when the communication is re-established, so the test expects that the
+// ha-reset command is sent to the partner.
+TEST_F(HAServiceTest, communicationRecoveryFailedPartnerLoadBalancingMultiThreading) {
+    MultiThreadingMgr::instance().setMode(true);
+    testSendUpdatesCommunicationRecoveryFailed("load-balancing", CONTROL_RESULT_ERROR,
+                                               CONTROL_RESULT_SUCCESS);
+}
+
+// Test scenario when lease updates are queued in the communication-recovery
+// state and sending them later fails. The partner server in the load-balancing
+// state but sending ha-reset fails. The server should remain in the
+// communication-recovery state.
+TEST_F(HAServiceTest, communicationRecoveryFailedResetFailed) {
+    testSendUpdatesCommunicationRecoveryFailed("load-balancing", CONTROL_RESULT_ERROR,
+                                               CONTROL_RESULT_ERROR);
+}
+
+// Test scenario when lease updates are queued in the communication-recovery
+// state and sending them later fails. The partner server in the load-balancing
+// state but sending ha-reset fails. The server should remain in the
+// communication-recovery state.
+TEST_F(HAServiceTest, communicationRecoveryFailedResetFailedMultiThreading) {
+    MultiThreadingMgr::instance().setMode(true);
+    testSendUpdatesCommunicationRecoveryFailed("load-balancing", CONTROL_RESULT_ERROR,
+                                               CONTROL_RESULT_ERROR);
+}
+
+// Test scenario when lease updates are queued in the communication-recovery
+// state and sending them later is unsuccessful. Partner is in ready state
+// when the communication is re-established, so the test expects that the
+// ha-reset command is NOT sent to the partner. The lease backlog is overflown,
+// so the server should transition to the waiting state.
+TEST_F(HAServiceTest, communicationRecoveryFailedPartnerReady) {
+    testSendUpdatesCommunicationRecoveryFailed("ready", CONTROL_RESULT_ERROR,
+                                               CONTROL_RESULT_SUCCESS, true);
+}
+
+// Test scenario when lease updates are queued in the communication-recovery
+// state and sending them later is unsuccessful. Partner is in ready state
+// when the communication is re-established, so the test expects that the
+// ha-reset command is NOT sent to the partner. The lease backlog is overflown,
+// so the server should transition to the waiting state.
+TEST_F(HAServiceTest, communicationRecoveryFailedPartnerReadyMultiThreading) {
      MultiThreadingMgr::instance().setMode(true);
-    testSendUpdatesCommunicationRecoveryFailed();
+    testSendUpdatesCommunicationRecoveryFailed("ready", CONTROL_RESULT_ERROR,
+                                               CONTROL_RESULT_SUCCESS, true);
  }
  
  // Test scenario when all lease updates are sent successfully.
@@ -2124,16 +2264,62 @@ TEST_F(HAServiceTest, sendUpdatesCommunicationRecovery6MultiThreading) {
  }
  
  // Test scenario when lease updates are queued in the communication-recovery
-// state for later send.
-TEST_F(HAServiceTest, sendUpdatesCommunicationRecovery6Failed) {
-    testSendUpdatesCommunicationRecovery6Failed();
+// state and sending them later is unsuccessful. Partner is in load-balancing
+// state when the communication is re-established, so the test expects that the
+// ha-reset command is sent to the partner.
+TEST_F(HAServiceTest, communicationRecoveryFailed6PartnerLoadBalancing) {
+    testSendUpdatesCommunicationRecovery6Failed("load-balancing", CONTROL_RESULT_ERROR,
+                                                CONTROL_RESULT_SUCCESS);
  }
  
  // Test scenario when lease updates are queued in the communication-recovery
-// state for later send. Multi threading case.
-TEST_F(HAServiceTest, sendUpdatesCommunicationRecovery6FailedMultiThreading) {
+// state and sending them later is unsuccessful. Partner is in load-balancing
+// state when the communication is re-established, so the test expects that the
+// ha-reset command is sent to the partner.
+TEST_F(HAServiceTest, communicationRecovery6FailedPartnerLoadBalancingMultiThreading) {
+    MultiThreadingMgr::instance().setMode(true);
+    testSendUpdatesCommunicationRecovery6Failed("load-balancing", CONTROL_RESULT_ERROR,
+                                                CONTROL_RESULT_SUCCESS);
+}
+
+// Test scenario when lease updates are queued in the communication-recovery
+// state and sending them later fails. The partner server in the load-balancing
+// state but sending ha-reset fails. The server should remain in the
+// communication-recovery state.
+TEST_F(HAServiceTest, communicationRecovery6FailedResetFailed) {
+    testSendUpdatesCommunicationRecovery6Failed("load-balancing", CONTROL_RESULT_ERROR,
+                                               CONTROL_RESULT_ERROR);
+}
+
+// Test scenario when lease updates are queued in the communication-recovery
+// state and sending them later fails. The partner server in the load-balancing
+// state but sending ha-reset fails. The server should remain in the
+// communication-recovery state.
+TEST_F(HAServiceTest, communicationRecovery6FailedResetFailedMultiThreading) {
+    MultiThreadingMgr::instance().setMode(true);
+    testSendUpdatesCommunicationRecovery6Failed("load-balancing", CONTROL_RESULT_ERROR,
+                                                CONTROL_RESULT_ERROR);
+}
+
+// Test scenario when lease updates are queued in the communication-recovery
+// state and sending them later is unsuccessful. Partner is in ready state
+// when the communication is re-established, so the test expects that the
+// ha-reset command is NOT sent to the partner. The lease backlog is overflown,
+// so the server should transition to the waiting state.
+TEST_F(HAServiceTest, communicationRecovery6FailedPartnerReady) {
+    testSendUpdatesCommunicationRecovery6Failed("ready", CONTROL_RESULT_ERROR,
+                                                CONTROL_RESULT_SUCCESS, true);
+}
+
+// Test scenario when lease updates are queued in the communication-recovery
+// state and sending them later is unsuccessful. Partner is in ready state
+// when the communication is re-established, so the test expects that the
+// ha-reset command is NOT sent to the partner. The lease backlog is overflown,
+// so the server should transition to the waiting state.
+TEST_F(HAServiceTest, communicationRecovery6FailedPartnerReadyMultiThreading) {
      MultiThreadingMgr::instance().setMode(true);
-    testSendUpdatesCommunicationRecovery6Failed();
+    testSendUpdatesCommunicationRecovery6Failed("ready", CONTROL_RESULT_ERROR,
+                                               CONTROL_RESULT_SUCCESS, true);
  }
  
  // Test scenario when all lease updates are sent successfully.
author	Marcin Siodelski <marcin@isc.org>
	Tue, 12 Jan 2021 09:39:04 +0000 (10:39 +0100)
committer	Marcin Siodelski <marcin@isc.org>
	Wed, 13 Jan 2021 09:12:32 +0000 (10:12 +0100)
src/hooks/dhcp/high_availability/ha_messages.cc		patch \| blob \| blame \| history
src/hooks/dhcp/high_availability/ha_messages.h		patch \| blob \| blame \| history
src/hooks/dhcp/high_availability/ha_messages.mes		patch \| blob \| blame \| history
src/hooks/dhcp/high_availability/ha_service.cc		patch \| blob \| blame \| history
src/hooks/dhcp/high_availability/ha_service.h		patch \| blob \| blame \| history
src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc		patch \| blob \| blame \| history