]> git.ipfire.org Git - thirdparty/kea.git/commitdiff
[#2754] Fixed clock skew for partner unavailable
authorMarcin Siodelski <msiodelski@gmail.com>
Tue, 28 Mar 2023 09:31:13 +0000 (11:31 +0200)
committerMarcin Siodelski <msiodelski@gmail.com>
Wed, 29 Mar 2023 20:17:25 +0000 (22:17 +0200)
The clock skew is now reset when the partner is unavailable. We can't
reliably tell what the clock skew is when we have no communication with
the partner.

src/hooks/dhcp/high_availability/communication_state.cc
src/hooks/dhcp/high_availability/communication_state.h
src/hooks/dhcp/high_availability/ha_service.cc
src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc
src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc

index cd1a5f544963c6fc0463c1aca1926e5ee9c6aa3e..9514a0a24097377eb586f7055027fa82c22a7f3e 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2018-2023 Internet Systems Consortium, Inc. ("ISC")
 //
 // This Source Code Form is subject to the terms of the Mozilla Public
 // License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -99,6 +99,18 @@ CommunicationState::setPartnerState(const std::string& state) {
     }
 }
 
+void
+CommunicationState::setPartnerUnavailable() {
+    if (MultiThreadingMgr::instance().getMode()) {
+        std::lock_guard<std::mutex> lk(*mutex_);
+        setPartnerStateInternal("unavailable");
+        resetPartnerTimeInternal();
+    } else {
+        setPartnerStateInternal("unavailable");
+        resetPartnerTimeInternal();
+    }
+}
+
 void
 CommunicationState::setPartnerStateInternal(const std::string& state) {
     try {
@@ -469,6 +481,14 @@ CommunicationState::setPartnerTimeInternal(const std::string& time_text) {
     clock_skew_ = partner_time_at_skew_ - my_time_at_skew_;
 }
 
+void
+CommunicationState::resetPartnerTimeInternal() {
+    clock_skew_ = boost::posix_time::time_duration(0, 0, 0, 0);
+    last_clock_skew_warn_ = boost::posix_time::ptime();
+    my_time_at_skew_ = boost::posix_time::ptime();
+    partner_time_at_skew_ = boost::posix_time::ptime();
+}
+
 std::string
 CommunicationState::logFormatClockSkew() const {
     if (MultiThreadingMgr::instance().getMode()) {
index a937025748941fffd0921e39ab174d1d73fc2781..2ca01db97d0c157d492c9e2bac4310f4a79ce386 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2018-2023 Internet Systems Consortium, Inc. ("ISC")
 //
 // This Source Code Form is subject to the terms of the Mozilla Public
 // License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -106,6 +106,14 @@ public:
     /// @throw BadValue if unsupported state value was provided.
     void setPartnerState(const std::string& state);
 
+    /// @brief Sets partner state unavailable.
+    ///
+    /// This function should be called whenever there is a problem to
+    /// communicate with the partner. Besides setting the state, it also
+    /// resets some other communication states. In particular, it resets
+    /// the clock skew.
+    void setPartnerUnavailable();
+
 private:
     /// @brief Sets partner state.
     ///
@@ -544,6 +552,7 @@ public:
     void setPartnerTime(const std::string& time_text);
 
 private:
+
     /// @brief Provide partner's notion of time so the new clock skew can be
     /// calculated.
     ///
@@ -557,6 +566,12 @@ private:
     /// precision.
     void setPartnerTimeInternal(const std::string& time_text);
 
+    /// @brief Resets the partner time and the clock skew to defaults.
+    ///
+    /// This function is called internally when the communication with the
+    /// partner fails.
+    void resetPartnerTimeInternal();
+
 public:
     /// @brief Returns current clock skew value in the logger friendly format.
     std::string logFormatClockSkew() const;
index 75a06094c0b1e31a7e17df0d2685dee93d28a7b4..2521197d9deb9ec43341f296843d55859d6ab60d 100644 (file)
@@ -1443,7 +1443,7 @@ HAService::asyncSendLeaseUpdate(const QueryPtrType& query,
                     if (!lease_update_conflict) {
                         // If we were unable to communicate with the partner we set partner's
                         // state as unavailable.
-                        communication_state_->setPartnerState("unavailable");
+                        communication_state_->setPartnerUnavailable();
                     }
                 } else {
                     // Lease update successful and we may need to clear some previously
@@ -1796,7 +1796,7 @@ HAService::asyncSendHeartbeat() {
             } else {
                 // We were unable to retrieve partner's state, so let's mark it
                 // as unavailable.
-                communication_state_->setPartnerState("unavailable");
+                communication_state_->setPartnerUnavailable();
                 // Log if the communication is interrupted.
                 if (communication_state_->isCommunicationInterrupted()) {
                     LOG_WARN(ha_logger, HA_COMMUNICATION_INTERRUPTED)
@@ -1904,7 +1904,7 @@ HAService::asyncDisableDHCPService(HttpClient& http_client,
              // If there was an error communicating with the partner, mark the
              // partner as unavailable.
              if (!error_message.empty()) {
-                 communication_state_->setPartnerState("unavailable");
+                 communication_state_->setPartnerUnavailable();
              }
 
              // Invoke post request action if it was specified.
@@ -1981,7 +1981,7 @@ HAService::asyncEnableDHCPService(HttpClient& http_client,
              // If there was an error communicating with the partner, mark the
              // partner as unavailable.
              if (!error_message.empty()) {
-                 communication_state_->setPartnerState("unavailable");
+                 communication_state_->setPartnerUnavailable();
              }
 
              // Invoke post request action if it was specified.
@@ -2226,7 +2226,7 @@ HAService::asyncSyncLeasesInternal(http::HttpClient& http_client,
              // If there was an error communicating with the partner, mark the
              // partner as unavailable.
              if (!error_message.empty()) {
-                 communication_state_->setPartnerState("unavailable");
+                 communication_state_->setPartnerUnavailable();
 
              } else if (last_lease) {
                  // This indicates that there are more leases to be fetched.
@@ -2706,7 +2706,7 @@ HAService::processMaintenanceStart() {
              // If there was an error communicating with the partner, mark the
              // partner as unavailable.
              if (!error_message.empty()) {
-                 communication_state_->setPartnerState("unavailable");
+                 communication_state_->setPartnerUnavailable();
              }
 
              captured_ec = ec;
@@ -2820,7 +2820,7 @@ HAService::processMaintenanceCancel() {
              // If there was an error communicating with the partner, mark the
              // partner as unavailable.
              if (!error_message.empty()) {
-                 communication_state_->setPartnerState("unavailable");
+                 communication_state_->setPartnerUnavailable();
              }
         },
         HttpClient::RequestTimeout(TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST),
@@ -2916,7 +2916,7 @@ HAService::asyncSyncCompleteNotify(HttpClient& http_client,
              // If there was an error communicating with the partner, mark the
              // partner as unavailable.
              if (!error_message.empty()) {
-                 communication_state_->setPartnerState("unavailable");
+                 communication_state_->setPartnerUnavailable();
              }
 
              // Invoke post request action if it was specified.
index e9e282b26feb934c22f950e6131dcfad149522ee..501fc0e883dd8d1b4bd98891b86dc6d43e9352ab 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2018-2023 Internet Systems Consortium, Inc. ("ISC")
 //
 // This Source Code Form is subject to the terms of the Mozilla Public
 // License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -59,6 +59,12 @@ public:
     /// @brief Verifies that the partner state is set and retrieved correctly.
     void partnerStateTest();
 
+    /// @brief Verifies that the partner state is set to unavailable.
+    ///
+    /// Whether or not the function under test also resets the clock skew is
+    /// tested in a different test case.
+    void partnerStateUnavailableTest();
+
     /// @brief Verifies that the partner's scopes are set and retrieved correctly.
     void partnerScopesTest();
 
@@ -94,6 +100,10 @@ public:
     /// clockSkewShouldWarn and clockSkewShouldTerminate functions.
     void clockSkewTest();
 
+    /// @brief This test verifies that the clock skew calculations take into
+    /// account whether or not the partner is available.
+    void clockSkewPartnerUnavailableTest();
+
     /// @brief This test verifies that the clock skew value is formatted correctly
     /// for logging.
     void logFormatClockSkewTest();
@@ -198,6 +208,20 @@ CommunicationStateTest::partnerStateTest() {
     EXPECT_THROW(state_.setPartnerState("unsupported"), BadValue);
 }
 
+// Verifies that the partner state is set to unavailable.
+void
+CommunicationStateTest::partnerStateUnavailableTest() {
+    // Initially the state is unknown.
+    EXPECT_LT(state_.getPartnerState(), 0);
+
+    // Set a valid state initially.
+    state_.setPartnerState("hot-standby");
+    EXPECT_EQ(HA_HOT_STANDBY_ST, state_.getPartnerState());
+
+    state_.setPartnerUnavailable();
+    EXPECT_EQ(HA_UNAVAILABLE_ST, state_.getPartnerState());
+}
+
 // Verifies that the partner's scopes are set and retrieved correctly.
 void
 CommunicationStateTest::partnerScopesTest() {
@@ -555,6 +579,28 @@ CommunicationStateTest::clockSkewTest() {
     EXPECT_TRUE(state_.clockSkewShouldTerminate());
 }
 
+// This test verifies that the clock skew calculations take into
+// account whether or not the partner is available.
+void
+CommunicationStateTest::clockSkewPartnerUnavailableTest() {
+    // Default clock skew is 0.
+    EXPECT_FALSE(state_.clockSkewShouldWarn());
+    EXPECT_FALSE(state_.clockSkewShouldTerminate());
+
+    // Move the clock skew beyond the 60s limit. The alarms about the
+    // too high clock skew should be activated.
+    state_.setPartnerTime(HttpDateTime().rfc1123Format());
+    state_.clock_skew_ += boost::posix_time::time_duration(0, 1, 5);
+    EXPECT_TRUE(state_.clockSkewShouldWarn());
+    EXPECT_TRUE(state_.clockSkewShouldTerminate());
+
+    // Mark the partner as unavailable. It should reset the clock skew
+    // because we don't know the actual partner's time at the moment.
+    state_.setPartnerUnavailable();
+    EXPECT_FALSE(state_.clockSkewShouldWarn());
+    EXPECT_FALSE(state_.clockSkewShouldTerminate());
+}
+
 // This test verifies that the clock skew value is formatted correctly
 // for logging.
 void
@@ -1022,6 +1068,15 @@ TEST_F(CommunicationStateTest, clockSkewTestMultiThreading) {
     clockSkewTest();
 }
 
+TEST_F(CommunicationStateTest, clockSkewPartnerUnavailableTest) {
+    clockSkewPartnerUnavailableTest();
+}
+
+TEST_F(CommunicationStateTest, clockSkewPartnerUnavailableTestMultiThreading) {
+    MultiThreadingMgr::instance().setMode(true);
+    clockSkewPartnerUnavailableTest();
+}
+
 TEST_F(CommunicationStateTest, rejectedLeaseUpdatesTerminateTest) {
     rejectedLeaseUpdatesTerminateTest();
 }
index 6eb06c87f42281d93ad12dd19a7561c15f8a032f..d017906b4c37464d0489a3ba87b8caa953ab309c 100644 (file)
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2018-2023 Internet Systems Consortium, Inc. ("ISC")
 //
 // This Source Code Form is subject to the terms of the Mozilla Public
 // License, v. 2.0. If a copy of the MPL was not distributed with this
@@ -8003,4 +8003,35 @@ TEST_F(HAServiceStateMachineTest, clearRejectedLeaseUpdatesPassiveBackup) {
     EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_PASSIVE_BACKUP_ST)));
 }
 
+// Verifies that the server doesn't terminate when the partner is unavailable.
+TEST_F(HAServiceStateMachineTest, doNotTerminateWhenPartnerUnavailable) {
+    HAConfigPtr config_storage = createValidConfiguration();
+    ASSERT_NO_THROW_LOG(service_.reset(new TestHAService(io_service_, network_state_,
+                                                         config_storage)));
+    NakedCommunicationState4Ptr state(new NakedCommunicationState4(io_service_,
+                                                                   config_storage));
+    service_->communication_state_ = state;
+
+    // Begin in the load-balancing state and the partner is also in
+    // that state.
+    service_->verboseTransition(HA_LOAD_BALANCING_ST);
+    service_->runModel(HAService::NOP_EVT);
+    service_->communication_state_->poke();
+    service_->communication_state_->setPartnerState("load-balancing");
+
+    // Change the clock skew, so it is beyond the acceptable threshold.
+    state->clock_skew_ += boost::posix_time::time_duration(0, 1, 5);
+
+    // Send the heartbeat. We didn't setup the connection to the partner.
+    // The heartbeat should fail and our server should transition to the
+    // communication-recovery state. Earlier, the server had a bug that
+    // it would take the last known clock skew and transition to the
+    // terminated state instead. It was wrong because when the partner is
+    // unavailable we can't tell the current clock skew. The administrator
+    // may actually be fixing the time on the other server. Thus, we must
+    // not terminate.
+    ASSERT_NO_FATAL_FAILURE(waitForEvent(HAService::HA_HEARTBEAT_COMPLETE_EVT));
+    EXPECT_EQ(HA_COMMUNICATION_RECOVERY_ST, service_->getCurrState());
+}
+
 } // end of anonymous namespace