From: Marcin Siodelski Date: Tue, 28 Mar 2023 09:31:13 +0000 (+0200) Subject: [#2754] Fixed clock skew for partner unavailable X-Git-Tag: Kea-2.3.7~52 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5055d84d1a674c59bdfcb6b022feb750039b05d7;p=thirdparty%2Fkea.git [#2754] Fixed clock skew for partner unavailable The clock skew is now reset when the partner is unavailable. We can't reliably tell what the clock skew is when we have no communication with the partner. --- diff --git a/src/hooks/dhcp/high_availability/communication_state.cc b/src/hooks/dhcp/high_availability/communication_state.cc index cd1a5f5449..9514a0a240 100644 --- a/src/hooks/dhcp/high_availability/communication_state.cc +++ b/src/hooks/dhcp/high_availability/communication_state.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2023 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -99,6 +99,18 @@ CommunicationState::setPartnerState(const std::string& state) { } } +void +CommunicationState::setPartnerUnavailable() { + if (MultiThreadingMgr::instance().getMode()) { + std::lock_guard lk(*mutex_); + setPartnerStateInternal("unavailable"); + resetPartnerTimeInternal(); + } else { + setPartnerStateInternal("unavailable"); + resetPartnerTimeInternal(); + } +} + void CommunicationState::setPartnerStateInternal(const std::string& state) { try { @@ -469,6 +481,14 @@ CommunicationState::setPartnerTimeInternal(const std::string& time_text) { clock_skew_ = partner_time_at_skew_ - my_time_at_skew_; } +void +CommunicationState::resetPartnerTimeInternal() { + clock_skew_ = boost::posix_time::time_duration(0, 0, 0, 0); + last_clock_skew_warn_ = boost::posix_time::ptime(); + my_time_at_skew_ = boost::posix_time::ptime(); + partner_time_at_skew_ = boost::posix_time::ptime(); +} + std::string CommunicationState::logFormatClockSkew() const { if (MultiThreadingMgr::instance().getMode()) { diff --git a/src/hooks/dhcp/high_availability/communication_state.h b/src/hooks/dhcp/high_availability/communication_state.h index a937025748..2ca01db97d 100644 --- a/src/hooks/dhcp/high_availability/communication_state.h +++ b/src/hooks/dhcp/high_availability/communication_state.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2023 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -106,6 +106,14 @@ public: /// @throw BadValue if unsupported state value was provided. void setPartnerState(const std::string& state); + /// @brief Sets partner state unavailable. + /// + /// This function should be called whenever there is a problem to + /// communicate with the partner. Besides setting the state, it also + /// resets some other communication states. In particular, it resets + /// the clock skew. + void setPartnerUnavailable(); + private: /// @brief Sets partner state. /// @@ -544,6 +552,7 @@ public: void setPartnerTime(const std::string& time_text); private: + /// @brief Provide partner's notion of time so the new clock skew can be /// calculated. /// @@ -557,6 +566,12 @@ private: /// precision. void setPartnerTimeInternal(const std::string& time_text); + /// @brief Resets the partner time and the clock skew to defaults. + /// + /// This function is called internally when the communication with the + /// partner fails. + void resetPartnerTimeInternal(); + public: /// @brief Returns current clock skew value in the logger friendly format. std::string logFormatClockSkew() const; diff --git a/src/hooks/dhcp/high_availability/ha_service.cc b/src/hooks/dhcp/high_availability/ha_service.cc index 75a06094c0..2521197d9d 100644 --- a/src/hooks/dhcp/high_availability/ha_service.cc +++ b/src/hooks/dhcp/high_availability/ha_service.cc @@ -1443,7 +1443,7 @@ HAService::asyncSendLeaseUpdate(const QueryPtrType& query, if (!lease_update_conflict) { // If we were unable to communicate with the partner we set partner's // state as unavailable. - communication_state_->setPartnerState("unavailable"); + communication_state_->setPartnerUnavailable(); } } else { // Lease update successful and we may need to clear some previously @@ -1796,7 +1796,7 @@ HAService::asyncSendHeartbeat() { } else { // We were unable to retrieve partner's state, so let's mark it // as unavailable. - communication_state_->setPartnerState("unavailable"); + communication_state_->setPartnerUnavailable(); // Log if the communication is interrupted. if (communication_state_->isCommunicationInterrupted()) { LOG_WARN(ha_logger, HA_COMMUNICATION_INTERRUPTED) @@ -1904,7 +1904,7 @@ HAService::asyncDisableDHCPService(HttpClient& http_client, // If there was an error communicating with the partner, mark the // partner as unavailable. if (!error_message.empty()) { - communication_state_->setPartnerState("unavailable"); + communication_state_->setPartnerUnavailable(); } // Invoke post request action if it was specified. @@ -1981,7 +1981,7 @@ HAService::asyncEnableDHCPService(HttpClient& http_client, // If there was an error communicating with the partner, mark the // partner as unavailable. if (!error_message.empty()) { - communication_state_->setPartnerState("unavailable"); + communication_state_->setPartnerUnavailable(); } // Invoke post request action if it was specified. @@ -2226,7 +2226,7 @@ HAService::asyncSyncLeasesInternal(http::HttpClient& http_client, // If there was an error communicating with the partner, mark the // partner as unavailable. if (!error_message.empty()) { - communication_state_->setPartnerState("unavailable"); + communication_state_->setPartnerUnavailable(); } else if (last_lease) { // This indicates that there are more leases to be fetched. @@ -2706,7 +2706,7 @@ HAService::processMaintenanceStart() { // If there was an error communicating with the partner, mark the // partner as unavailable. if (!error_message.empty()) { - communication_state_->setPartnerState("unavailable"); + communication_state_->setPartnerUnavailable(); } captured_ec = ec; @@ -2820,7 +2820,7 @@ HAService::processMaintenanceCancel() { // If there was an error communicating with the partner, mark the // partner as unavailable. if (!error_message.empty()) { - communication_state_->setPartnerState("unavailable"); + communication_state_->setPartnerUnavailable(); } }, HttpClient::RequestTimeout(TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST), @@ -2916,7 +2916,7 @@ HAService::asyncSyncCompleteNotify(HttpClient& http_client, // If there was an error communicating with the partner, mark the // partner as unavailable. if (!error_message.empty()) { - communication_state_->setPartnerState("unavailable"); + communication_state_->setPartnerUnavailable(); } // Invoke post request action if it was specified. diff --git a/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc b/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc index e9e282b26f..501fc0e883 100644 --- a/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc +++ b/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2023 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -59,6 +59,12 @@ public: /// @brief Verifies that the partner state is set and retrieved correctly. void partnerStateTest(); + /// @brief Verifies that the partner state is set to unavailable. + /// + /// Whether or not the function under test also resets the clock skew is + /// tested in a different test case. + void partnerStateUnavailableTest(); + /// @brief Verifies that the partner's scopes are set and retrieved correctly. void partnerScopesTest(); @@ -94,6 +100,10 @@ public: /// clockSkewShouldWarn and clockSkewShouldTerminate functions. void clockSkewTest(); + /// @brief This test verifies that the clock skew calculations take into + /// account whether or not the partner is available. + void clockSkewPartnerUnavailableTest(); + /// @brief This test verifies that the clock skew value is formatted correctly /// for logging. void logFormatClockSkewTest(); @@ -198,6 +208,20 @@ CommunicationStateTest::partnerStateTest() { EXPECT_THROW(state_.setPartnerState("unsupported"), BadValue); } +// Verifies that the partner state is set to unavailable. +void +CommunicationStateTest::partnerStateUnavailableTest() { + // Initially the state is unknown. + EXPECT_LT(state_.getPartnerState(), 0); + + // Set a valid state initially. + state_.setPartnerState("hot-standby"); + EXPECT_EQ(HA_HOT_STANDBY_ST, state_.getPartnerState()); + + state_.setPartnerUnavailable(); + EXPECT_EQ(HA_UNAVAILABLE_ST, state_.getPartnerState()); +} + // Verifies that the partner's scopes are set and retrieved correctly. void CommunicationStateTest::partnerScopesTest() { @@ -555,6 +579,28 @@ CommunicationStateTest::clockSkewTest() { EXPECT_TRUE(state_.clockSkewShouldTerminate()); } +// This test verifies that the clock skew calculations take into +// account whether or not the partner is available. +void +CommunicationStateTest::clockSkewPartnerUnavailableTest() { + // Default clock skew is 0. + EXPECT_FALSE(state_.clockSkewShouldWarn()); + EXPECT_FALSE(state_.clockSkewShouldTerminate()); + + // Move the clock skew beyond the 60s limit. The alarms about the + // too high clock skew should be activated. + state_.setPartnerTime(HttpDateTime().rfc1123Format()); + state_.clock_skew_ += boost::posix_time::time_duration(0, 1, 5); + EXPECT_TRUE(state_.clockSkewShouldWarn()); + EXPECT_TRUE(state_.clockSkewShouldTerminate()); + + // Mark the partner as unavailable. It should reset the clock skew + // because we don't know the actual partner's time at the moment. + state_.setPartnerUnavailable(); + EXPECT_FALSE(state_.clockSkewShouldWarn()); + EXPECT_FALSE(state_.clockSkewShouldTerminate()); +} + // This test verifies that the clock skew value is formatted correctly // for logging. void @@ -1022,6 +1068,15 @@ TEST_F(CommunicationStateTest, clockSkewTestMultiThreading) { clockSkewTest(); } +TEST_F(CommunicationStateTest, clockSkewPartnerUnavailableTest) { + clockSkewPartnerUnavailableTest(); +} + +TEST_F(CommunicationStateTest, clockSkewPartnerUnavailableTestMultiThreading) { + MultiThreadingMgr::instance().setMode(true); + clockSkewPartnerUnavailableTest(); +} + TEST_F(CommunicationStateTest, rejectedLeaseUpdatesTerminateTest) { rejectedLeaseUpdatesTerminateTest(); } diff --git a/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc b/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc index 6eb06c87f4..d017906b4c 100644 --- a/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc +++ b/src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2022 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2023 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -8003,4 +8003,35 @@ TEST_F(HAServiceStateMachineTest, clearRejectedLeaseUpdatesPassiveBackup) { EXPECT_FALSE(expectClearRejectedLeases(MyState(HA_PASSIVE_BACKUP_ST))); } +// Verifies that the server doesn't terminate when the partner is unavailable. +TEST_F(HAServiceStateMachineTest, doNotTerminateWhenPartnerUnavailable) { + HAConfigPtr config_storage = createValidConfiguration(); + ASSERT_NO_THROW_LOG(service_.reset(new TestHAService(io_service_, network_state_, + config_storage))); + NakedCommunicationState4Ptr state(new NakedCommunicationState4(io_service_, + config_storage)); + service_->communication_state_ = state; + + // Begin in the load-balancing state and the partner is also in + // that state. + service_->verboseTransition(HA_LOAD_BALANCING_ST); + service_->runModel(HAService::NOP_EVT); + service_->communication_state_->poke(); + service_->communication_state_->setPartnerState("load-balancing"); + + // Change the clock skew, so it is beyond the acceptable threshold. + state->clock_skew_ += boost::posix_time::time_duration(0, 1, 5); + + // Send the heartbeat. We didn't setup the connection to the partner. + // The heartbeat should fail and our server should transition to the + // communication-recovery state. Earlier, the server had a bug that + // it would take the last known clock skew and transition to the + // terminated state instead. It was wrong because when the partner is + // unavailable we can't tell the current clock skew. The administrator + // may actually be fixing the time on the other server. Thus, we must + // not terminate. + ASSERT_NO_FATAL_FAILURE(waitForEvent(HAService::HA_HEARTBEAT_COMPLETE_EVT)); + EXPECT_EQ(HA_COMMUNICATION_RECOVERY_ST, service_->getCurrState()); +} + } // end of anonymous namespace