From: Marcin Siodelski Date: Mon, 18 May 2020 09:58:42 +0000 (+0200) Subject: [#1087] HA logs in communication interrupted X-Git-Tag: Kea-1.7.8~43 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8f7d8a4daa0e908eda6a388f969a6efd002a152a;p=thirdparty%2Fkea.git [#1087] HA logs in communication interrupted There are new log messages emitted in the communication interrupted state to indicate whether the server sees the traffic going to the partner. --- diff --git a/src/hooks/dhcp/high_availability/communication_state.cc b/src/hooks/dhcp/high_availability/communication_state.cc index 8500d4663e..10c5bc9dac 100644 --- a/src/hooks/dhcp/high_availability/communication_state.cc +++ b/src/hooks/dhcp/high_availability/communication_state.cc @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -27,6 +28,7 @@ using namespace isc::asiolink; using namespace isc::data; using namespace isc::dhcp; using namespace isc::http; +using namespace isc::log; using namespace boost::posix_time; namespace { @@ -348,6 +350,8 @@ CommunicationState4::analyzeMessage(const boost::shared_ptr& message) client_id = opt_client_id->getData(); } + bool log_unacked = false; + // Check if the given client was already recorded. auto& idx = connecting_clients_.get<0>(); auto existing_request = idx.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id)); @@ -359,14 +363,35 @@ CommunicationState4::analyzeMessage(const boost::shared_ptr& message) if (!existing_request->unacked_ && unacked) { ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked }; idx.replace(existing_request, connecting_client); + log_unacked = true; + } + + } else { + // This is the first time we see the packet from this client. Let's + // record it. + ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked }; + idx.insert(connecting_client); + log_unacked = unacked; + + if (!unacked) { + // This is the first time we see this client after getting into the + // communication interrupted state. But, this client hasn't been + // yet trying log enough to be considered unacked. + LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT4) + .arg(message->getLabel()); } - return; } - // This is the first time we see the packet from this client. Let's - // record it. - ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked }; - idx.insert(connecting_client); + if (log_unacked) { + unsigned unacked_left = 0; + if (config_->getMaxUnackedClients() > getUnackedClientsCount()) { + unacked_left = config_->getMaxUnackedClients() > getUnackedClientsCount(); + } + LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED) + .arg(message->getLabel()) + .arg(getUnackedClientsCount()) + .arg(unacked_left); + } } bool @@ -418,6 +443,8 @@ CommunicationState6::analyzeMessage(const boost::shared_ptr& message) return; } + bool log_unacked = false; + // Check if the given client was already recorded. auto& idx = connecting_clients_.get<0>(); auto existing_request = idx.find(duid->getData()); @@ -429,13 +456,35 @@ CommunicationState6::analyzeMessage(const boost::shared_ptr& message) if (!existing_request->unacked_ && unacked) { ConnectingClient6 connecting_client{ duid->getData(), unacked }; idx.replace(existing_request, connecting_client); + log_unacked = true; + } + + } else { + // This is the first time we see the packet from this client. Let's + // record it. + ConnectingClient6 connecting_client{ duid->getData(), unacked }; + idx.insert(connecting_client); + log_unacked = unacked; + + if (!unacked) { + // This is the first time we see this client after getting into the + // communication interrupted state. But, this client hasn't been + // yet trying log enough to be considered unacked. + LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT6) + .arg(message->getLabel()); } } - // This is the first time we see the packet from this client. Let's - // record it. - ConnectingClient6 connecting_client{ duid->getData(), unacked }; - idx.insert(connecting_client); + if (log_unacked) { + unsigned unacked_left = 0; + if (config_->getMaxUnackedClients() > getUnackedClientsCount()) { + unacked_left = config_->getMaxUnackedClients() > getUnackedClientsCount(); + } + LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED) + .arg(message->getLabel()) + .arg(getUnackedClientsCount()) + .arg(unacked_left); + } } bool diff --git a/src/hooks/dhcp/high_availability/ha_messages.cc b/src/hooks/dhcp/high_availability/ha_messages.cc index 5beb397150..3167aaf4a7 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.cc +++ b/src/hooks/dhcp/high_availability/ha_messages.cc @@ -1,4 +1,4 @@ -// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Thu Apr 30 2020 12:21 +// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon May 18 2020 11:51 #include #include @@ -16,6 +16,11 @@ extern const isc::log::MessageID HA_BUFFER6_RECEIVE_NOT_FOR_US = "HA_BUFFER6_REC extern const isc::log::MessageID HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED = "HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED"; extern const isc::log::MessageID HA_BUFFER6_RECEIVE_UNPACK_FAILED = "HA_BUFFER6_RECEIVE_UNPACK_FAILED"; extern const isc::log::MessageID HA_COMMAND_PROCESSED_FAILED = "HA_COMMAND_PROCESSED_FAILED"; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED = "HA_COMMUNICATION_INTERRUPTED"; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4 = "HA_COMMUNICATION_INTERRUPTED_CLIENT4"; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED = "HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED"; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6 = "HA_COMMUNICATION_INTERRUPTED_CLIENT6"; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED = "HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED"; extern const isc::log::MessageID HA_CONFIGURATION_FAILED = "HA_CONFIGURATION_FAILED"; extern const isc::log::MessageID HA_CONFIGURATION_SUCCESSFUL = "HA_CONFIGURATION_SUCCESSFUL"; extern const isc::log::MessageID HA_CONFIG_AUTO_FAILOVER_DISABLED = "HA_CONFIG_AUTO_FAILOVER_DISABLED"; @@ -96,6 +101,11 @@ const char* values[] = { "HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED", "an error upacking an option, caused subsequent options to be skipped: %1", "HA_BUFFER6_RECEIVE_UNPACK_FAILED", "failed to parse query from %1 to %2, received over interface %3, reason: %4", "HA_COMMAND_PROCESSED_FAILED", "command_processed callout failed: %1", + "HA_COMMUNICATION_INTERRUPTED", "communication is interrupted and failover process has been started", + "HA_COMMUNICATION_INTERRUPTED_CLIENT4", "%1: new client attempting to get a lease from the partner", + "HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED", "%1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state", + "HA_COMMUNICATION_INTERRUPTED_CLIENT6", "%1: new client attempting to get a lease from the partner", + "HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED", "%1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state", "HA_CONFIGURATION_FAILED", "failed to configure High Availability hooks library: %1", "HA_CONFIGURATION_SUCCESSFUL", "HA hook library has been successfully configured", "HA_CONFIG_AUTO_FAILOVER_DISABLED", "auto-failover disabled for %1", diff --git a/src/hooks/dhcp/high_availability/ha_messages.h b/src/hooks/dhcp/high_availability/ha_messages.h index 89528421c5..ca6769b8d3 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.h +++ b/src/hooks/dhcp/high_availability/ha_messages.h @@ -1,4 +1,4 @@ -// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Thu Apr 30 2020 12:21 +// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon May 18 2020 11:51 #ifndef HA_MESSAGES_H #define HA_MESSAGES_H @@ -17,6 +17,11 @@ extern const isc::log::MessageID HA_BUFFER6_RECEIVE_NOT_FOR_US; extern const isc::log::MessageID HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED; extern const isc::log::MessageID HA_BUFFER6_RECEIVE_UNPACK_FAILED; extern const isc::log::MessageID HA_COMMAND_PROCESSED_FAILED; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6; +extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED; extern const isc::log::MessageID HA_CONFIGURATION_FAILED; extern const isc::log::MessageID HA_CONFIGURATION_SUCCESSFUL; extern const isc::log::MessageID HA_CONFIG_AUTO_FAILOVER_DISABLED; diff --git a/src/hooks/dhcp/high_availability/ha_messages.mes b/src/hooks/dhcp/high_availability/ha_messages.mes index 951ae23b1e..4c8f757fb5 100644 --- a/src/hooks/dhcp/high_availability/ha_messages.mes +++ b/src/hooks/dhcp/high_availability/ha_messages.mes @@ -58,6 +58,53 @@ reason for failure. This error message is issued when the callout for the command_processed hook point failed. The argument contains a reason for the error. +% HA_COMMUNICATION_INTERRUPTED communication is interrupted and failover process has been started +This warning message is issued by the server which discovered that the +communication to the active partner has been interrupted for a time +period longer than the configured heartbeat-delay time. At this stage +the server starts the failover procedure by monitoring the DHCP traffic +sent to the partner and checking whether the partner server responds to +this traffic. If the max-unacked-clients value is set to 0 such +verification is disabled in which case the server will transition to +the partner-down state. + +% HA_COMMUNICATION_INTERRUPTED_CLIENT4 %1: new client attempting to get a lease from the partner +This informational message is issued when the surviving server observes +a DHCP packet sent to the partner with which the commuication is interrupted. +The client which packet is observed is not yet considered "unacked" because +the secs field value does not exceed the configured threshold specified +with max-ack-delay. + +% HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED %1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state +This informational message is issued when the surviving server determines +that its partner failed to respond to the DHCP query and that this client +is considered to not be served by the partner. The surviving server counts +such clients and if the number of such clients exceeds the max-unacked-clients +threshold, the server will transition to the partner-down state. The first +argument contains client identification information. The second argument +specifies the number of clients which the server failed to respond to. The +third argument specifies the number of additional clients which, if not +provisioned, will cause the server to transition to the partner-down state. + +% HA_COMMUNICATION_INTERRUPTED_CLIENT6 %1: new client attempting to get a lease from the partner +This informational message is issued when the surviving server observes +a DHCP packet sent to the partner with which the commuication is interrupted. +The client which packet is observed is not yet considered "unacked" because +the elapsed time option value does not exceed the configured threshold +specified with max-ack-delay. The sole argument specifies client +identification information. + +% HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED %1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state +This informational message is issued when the surviving server determines +that its partner failed to respond to the DHCP query and that this client +is considered to not be served by the partner. The surviving server counts +such clients and if the number of such clients exceeds the max-unacked-clients +threshold, the server will transition to the partner-down state. The first +argument contains client identification information. The second argument +specifies the number of clients which the server failed to respond to. The +third argument specifies the number of additional clients which, if not +provisioned, will cause the server to transition to the partner-down state. + % HA_CONFIGURATION_FAILED failed to configure High Availability hooks library: %1 This error message is issued when there is an error configuring the HA hooks library. The argument provides the detailed error message. diff --git a/src/hooks/dhcp/high_availability/ha_service.cc b/src/hooks/dhcp/high_availability/ha_service.cc index e907b5320d..a5ae0a93e8 100644 --- a/src/hooks/dhcp/high_availability/ha_service.cc +++ b/src/hooks/dhcp/high_availability/ha_service.cc @@ -1349,6 +1349,10 @@ HAService::asyncSendHeartbeat() { // We were unable to retrieve partner's state, so let's mark it // as unavailable. communication_state_->setPartnerState("unavailable"); + // Log if the communication is interrupted. + if (communication_state_->isCommunicationInterrupted()) { + LOG_WARN(ha_logger, HA_COMMUNICATION_INTERRUPTED); + } } // Whatever the result of the heartbeat was, the state machine needs