From: Thomas Markwalder Date: Mon, 8 Jul 2019 17:26:58 +0000 (-0400) Subject: [#174,!414] HA now logs server times along with skew X-Git-Tag: Kea-1.6.1~10^2~114 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dabd418852c88e14d82bd1d6aa9c10f9beb19f16;p=thirdparty%2Fkea.git [#174,!414] HA now logs server times along with skew src/hooks/dhcp/high_availability/communication_state.* ComunicationState::setPartnerTime() - in addition to the skew it now retains current time and partner time CommunicationState::logFormatClockSkew() - now outputs current time and partner time, along with skew value src/hooks/dhcp/high_availability/ha_messages.* HA_HIGH_CLOCK_SKEW HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION - modified to accomdate peer times HA_TERMINATED - reworded, as entering this state is a generic endpoint that may, in future be reached for reasons other than clock skew src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc TEST_F(CommunicationStateTest, logFormatClockSkew) - updated --- diff --git a/ChangeLog b/ChangeLog index 9abc64a5fe..8fd5012c81 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +1627. [func] tmark + High Availaiblity logging now also emits server and partner system + times when reporting clock skew issues. Prior to this it reported + only the skew between the two servers. + (Gitlab #174,!414, git TBD) + 1626. [bug] marcin Automatically delete embedded options as a result of deleting a subnet, pool or shared network from the MySQL Configuration diff --git a/src/hooks/dhcp/high_availability/communication_state.cc b/src/hooks/dhcp/high_availability/communication_state.cc index 429f230ee8..2a159c4770 100644 --- a/src/hooks/dhcp/high_availability/communication_state.cc +++ b/src/hooks/dhcp/high_availability/communication_state.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2018 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,7 @@ CommunicationState::CommunicationState(const IOServicePtr& io_service, : io_service_(io_service), config_(config), timer_(), interval_(0), poke_time_(boost::posix_time::microsec_clock::universal_time()), heartbeat_impl_(0), partner_state_(-1), clock_skew_(0, 0, 0, 0), - last_clock_skew_warn_() { + last_clock_skew_warn_(), my_time_at_skew_(), partner_time_at_skew_() { } CommunicationState::~CommunicationState() { @@ -216,26 +217,36 @@ CommunicationState::isClockSkewGreater(const long seconds) const { void CommunicationState::setPartnerTime(const std::string& time_text) { - HttpDateTime partner_time = HttpDateTime().fromRfc1123(time_text); - HttpDateTime current_time = HttpDateTime(); + partner_time_at_skew_ = HttpDateTime().fromRfc1123(time_text).getPtime(); + my_time_at_skew_ = HttpDateTime().getPtime(); - clock_skew_ = partner_time.getPtime() - current_time.getPtime(); + clock_skew_ = partner_time_at_skew_ - my_time_at_skew_; } std::string CommunicationState::logFormatClockSkew() const { - std::ostringstream s; + std::ostringstream os; + + if ((my_time_at_skew_.is_not_a_date_time()) || + (partner_time_at_skew_.is_not_a_date_time())) { + // Guard against being called before times have been set. + // Otherwise we'll get out-range exceptions. + return ("skew not initialized"); + } + + os << "my time: " << util::ptimeToText(my_time_at_skew_) + << ", partner's time: " << util::ptimeToText(partner_time_at_skew_) + << ", partner's clock is "; // If negative clock skew, the partner's time is behind our time. if (clock_skew_.is_negative()) { - s << clock_skew_.invert_sign().total_seconds() << "s behind"; - + os << clock_skew_.invert_sign().total_seconds() << "s behind"; } else { // Partner's time is ahead of ours. - s << clock_skew_.total_seconds() << "s ahead"; + os << clock_skew_.total_seconds() << "s ahead"; } - return (s.str()); + return (os.str()); } CommunicationState4::CommunicationState4(const IOServicePtr& io_service, diff --git a/src/hooks/dhcp/high_availability/communication_state.h b/src/hooks/dhcp/high_availability/communication_state.h index fb12517546..1bc80e889b 100644 --- a/src/hooks/dhcp/high_availability/communication_state.h +++ b/src/hooks/dhcp/high_availability/communication_state.h @@ -1,4 +1,4 @@ -// Copyright (C) 2018 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -276,7 +276,9 @@ public: /// calculated. /// /// @param time_text Partner's time received in response to a heartbeat. The - /// time must be provided in the RFC 1123 format. + /// time must be provided in the RFC 1123 format. It stores the current + /// time, partner's time, and the difference (skew) between them. + /// /// @throw isc::http::HttpTimeConversionError if the time format is invalid. /// /// @todo Consider some other time formats which include millisecond @@ -317,6 +319,12 @@ protected: /// @brief Holds a time when last warning about too high clock skew /// was issued. boost::posix_time::ptime last_clock_skew_warn_; + + /// @brief My time when skew was calculated. + boost::posix_time::ptime my_time_at_skew_; + + /// @brief Partner reported time when skew was calculated. + boost::posix_time::ptime partner_time_at_skew_; }; /// @brief Type of the pointer to the @c CommunicationState object. diff --git a/src/hooks/dhcp/high_availability/ha_messages.cc b/src/hooks/dhcp/high_availability/ha_messages.cc index 708db8faca..7baeab48ec 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.cc +++ b/src/hooks/dhcp/high_availability/ha_messages.cc @@ -1,4 +1,4 @@ -// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Wed Jun 26 2019 15:13 +// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon Jul 08 2019 13:20 #include #include @@ -104,8 +104,8 @@ const char* values[] = { "HA_HEARTBEAT_COMMUNICATIONS_FAILED", "failed to send heartbeat to %1: %2", "HA_HEARTBEAT_FAILED", "heartbeat to %1 failed: %2", "HA_HEARTBEAT_HANDLER_FAILED", "heartbeat command failed: %1", - "HA_HIGH_CLOCK_SKEW", "partner's clock is %1, please synchronize clocks!", - "HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION", "partner's clock is %1, causing HA service to terminate", + "HA_HIGH_CLOCK_SKEW", "%1, please synchronize clocks!", + "HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION", "%1, causing HA service to terminate", "HA_INIT_OK", "loading High Availability hooks library successful", "HA_LEASES4_COMMITTED_FAILED", "leases4_committed callout failed: %1", "HA_LEASES4_COMMITTED_NOTHING_TO_UPDATE", "%1: leases4_committed callout was invoked without any leases", @@ -137,7 +137,7 @@ const char* values[] = { "HA_SYNC_HANDLER_FAILED", "ha-sync command failed: %1", "HA_SYNC_START", "starting lease database synchronization with %1", "HA_SYNC_SUCCESSFUL", "lease database synchronization with %1 completed successfully in %2", - "HA_TERMINATED", "HA service terminated because of the unacceptable clock skew; fix the problem and restart!", + "HA_TERMINATED", "HA service terminated due to an unrecoverable condition. Check previous error message(s), address the problem and restart!", NULL }; diff --git a/src/hooks/dhcp/high_availability/ha_messages.h b/src/hooks/dhcp/high_availability/ha_messages.h index bbe359fe03..4fc5168f3f 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.h +++ b/src/hooks/dhcp/high_availability/ha_messages.h @@ -1,4 +1,4 @@ -// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Wed Jun 26 2019 15:13 +// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon Jul 08 2019 13:20 #ifndef HA_MESSAGES_H #define HA_MESSAGES_H diff --git a/src/hooks/dhcp/high_availability/ha_messages.mes b/src/hooks/dhcp/high_availability/ha_messages.mes index ef39c02f6a..0fc4d703a8 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.mes +++ b/src/hooks/dhcp/high_availability/ha_messages.mes @@ -163,14 +163,14 @@ This error message is issued to indicate that the heartbeat command handler failed while processing the command. The argument provides the reason for failure. -% HA_HIGH_CLOCK_SKEW partner's clock is %1, please synchronize clocks! +% HA_HIGH_CLOCK_SKEW %1, please synchronize clocks! This warning message is issued when the clock skew between the active servers exceeds 30 seconds. The HA service continues to operate but may not function properly, especially for low lease lifetimes. The administrator should should synchronize the clocks, e.g. using NTP. If the clock skew exceeds 60 seconds, the HA service will terminate. -% HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION partner's clock is %1, causing HA service to terminate +% HA_HIGH_CLOCK_SKEW_CAUSES_TERMINATION %1, causing HA service to terminate This warning message is issued when the clock skew between the active servers exceeds 60 seconds. The HA service stops. The servers will continue to respond to the DHCP queries but won't exchange lease updates or send heartbeats. @@ -360,7 +360,8 @@ lease database synchronization with the partner. The first argument specifies the name of the partner server. The second argument specifies the duration of the synchronization. -% HA_TERMINATED HA service terminated because of the unacceptable clock skew; fix the problem and restart! +% HA_TERMINATED HA service terminated due to an unrecoverable condition. Check previous error message(s), address the problem and restart! This error message is issued to indicate that the HA service has been stopped -due to unacceptable clock skew. The error can be fixed by synchronizing the -clocks on the active servers and restarting Kea. +due to an unacceptable condition (e.g. too large of a clock skew). The exact +cause should appear in a previous error message. Address the condition +reported then restart the servers to resume service. diff --git a/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc b/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc index 423e005bdc..7141b8c07f 100644 --- a/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc +++ b/src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2018 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2018-2019 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -368,14 +368,29 @@ TEST_F(CommunicationStateTest, clockSkew) { // This test verifies that the clock skew value is formatted correctly // for logging. TEST_F(CommunicationStateTest, logFormatClockSkew) { - // Partner time is ahead by 15s. - state_.clock_skew_ += boost::posix_time::time_duration(0, 0, 15); - EXPECT_EQ("15s ahead", state_.logFormatClockSkew()); + // Make sure logFormatClockSkew() does not throw if called prior + // the first call to setPartnerTime(). + std::string log; + ASSERT_NO_THROW(log = state_.logFormatClockSkew()); + EXPECT_EQ(std::string("skew not initialized"), log); - // Partner time is behind by 1m23s. - state_.setPartnerTime(HttpDateTime().rfc1123Format()); - state_.clock_skew_ -= boost::posix_time::time_duration(0, 1, 23); - EXPECT_EQ("83s behind", state_.logFormatClockSkew()); + // Get current time. + boost::posix_time::ptime now = HttpDateTime().getPtime(); + + // Partner time is ahead by 15s. + boost::posix_time::time_duration offset(0,0,15,0); + state_.setPartnerTime(HttpDateTime(now + offset).rfc1123Format()); + ASSERT_NO_THROW(log = state_.logFormatClockSkew()); + // We don't check the exact string for obvious reasons. + EXPECT_TRUE(log.find("s ahead") != std::string::npos) << + " log content wrong: " << log; + + // Partner time is behind by 15s. + state_.setPartnerTime(HttpDateTime(now - offset).rfc1123Format()); + ASSERT_NO_THROW(log = state_.logFormatClockSkew()); + // We don't check the exact string for obvious reasons. + EXPECT_TRUE(log.find("s behind") != std::string::npos) << + " log content wrong: " << log; } }