]> git.ipfire.org Git - thirdparty/kea.git/commitdiff
[#1087] HA logs in communication interrupted
authorMarcin Siodelski <marcin@isc.org>
Mon, 18 May 2020 09:58:42 +0000 (11:58 +0200)
committerMarcin Siodelski <marcin@isc.org>
Wed, 20 May 2020 15:48:16 +0000 (17:48 +0200)
There are new log messages emitted in the communication interrupted state
to indicate whether the server sees the traffic going to the partner.

src/hooks/dhcp/high_availability/communication_state.cc
src/hooks/dhcp/high_availability/ha_messages.cc
src/hooks/dhcp/high_availability/ha_messages.h
src/hooks/dhcp/high_availability/ha_messages.mes
src/hooks/dhcp/high_availability/ha_service.cc

index 8500d4663e93a7153d2841b46157b4944d4fe328..10c5bc9dac81656f84174e31df3ac182aca3034b 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <communication_state.h>
 #include <exceptions/exceptions.h>
+#include <ha_log.h>
 #include <ha_service_states.h>
 #include <cc/data.h>
 #include <exceptions/exceptions.h>
@@ -27,6 +28,7 @@ using namespace isc::asiolink;
 using namespace isc::data;
 using namespace isc::dhcp;
 using namespace isc::http;
+using namespace isc::log;
 using namespace boost::posix_time;
 
 namespace {
@@ -348,6 +350,8 @@ CommunicationState4::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message)
         client_id = opt_client_id->getData();
     }
 
+    bool log_unacked = false;
+
     // Check if the given client was already recorded.
     auto& idx = connecting_clients_.get<0>();
     auto existing_request = idx.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id));
@@ -359,14 +363,35 @@ CommunicationState4::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message)
         if (!existing_request->unacked_ && unacked) {
             ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
             idx.replace(existing_request, connecting_client);
+            log_unacked = true;
+        }
+
+    } else {
+        // This is the first time we see the packet from this client. Let's
+        // record it.
+        ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
+        idx.insert(connecting_client);
+        log_unacked = unacked;
+
+        if (!unacked) {
+            // This is the first time we see this client after getting into the
+            // communication interrupted state. But, this client hasn't been
+            // yet trying log enough to be considered unacked.
+            LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT4)
+                .arg(message->getLabel());
         }
-        return;
     }
 
-    // This is the first time we see the packet from this client. Let's
-    // record it.
-    ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
-    idx.insert(connecting_client);
+    if (log_unacked) {
+        unsigned unacked_left = 0;
+        if (config_->getMaxUnackedClients() > getUnackedClientsCount()) {
+            unacked_left = config_->getMaxUnackedClients() > getUnackedClientsCount();
+        }
+        LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED)
+            .arg(message->getLabel())
+            .arg(getUnackedClientsCount())
+            .arg(unacked_left);
+    }
 }
 
 bool
@@ -418,6 +443,8 @@ CommunicationState6::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message)
         return;
     }
 
+    bool log_unacked = false;
+
     // Check if the given client was already recorded.
     auto& idx = connecting_clients_.get<0>();
     auto existing_request = idx.find(duid->getData());
@@ -429,13 +456,35 @@ CommunicationState6::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message)
         if (!existing_request->unacked_ && unacked) {
             ConnectingClient6 connecting_client{ duid->getData(), unacked };
             idx.replace(existing_request, connecting_client);
+            log_unacked = true;
+        }
+
+    } else {
+        // This is the first time we see the packet from this client. Let's
+        // record it.
+        ConnectingClient6 connecting_client{ duid->getData(), unacked };
+        idx.insert(connecting_client);
+        log_unacked = unacked;
+
+        if (!unacked) {
+            // This is the first time we see this client after getting into the
+            // communication interrupted state. But, this client hasn't been
+            // yet trying log enough to be considered unacked.
+            LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT6)
+                .arg(message->getLabel());
         }
     }
 
-    // This is the first time we see the packet from this client. Let's
-    // record it.
-    ConnectingClient6 connecting_client{ duid->getData(), unacked };
-    idx.insert(connecting_client);
+    if (log_unacked) {
+        unsigned unacked_left = 0;
+        if (config_->getMaxUnackedClients() > getUnackedClientsCount()) {
+            unacked_left = config_->getMaxUnackedClients() > getUnackedClientsCount();
+        }
+        LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED)
+            .arg(message->getLabel())
+            .arg(getUnackedClientsCount())
+            .arg(unacked_left);
+    }
 }
 
 bool
index 5beb397150b4b7b1ceb5fea312165fa18a54f9cd..3167aaf4a76df94281e432c92d4c7c7c08eeca7f 100644 (file)
@@ -1,4 +1,4 @@
-// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Thu Apr 30 2020 12:21
+// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon May 18 2020 11:51
 
 #include <cstddef>
 #include <log/message_types.h>
@@ -16,6 +16,11 @@ extern const isc::log::MessageID HA_BUFFER6_RECEIVE_NOT_FOR_US = "HA_BUFFER6_REC
 extern const isc::log::MessageID HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED = "HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED";
 extern const isc::log::MessageID HA_BUFFER6_RECEIVE_UNPACK_FAILED = "HA_BUFFER6_RECEIVE_UNPACK_FAILED";
 extern const isc::log::MessageID HA_COMMAND_PROCESSED_FAILED = "HA_COMMAND_PROCESSED_FAILED";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED = "HA_COMMUNICATION_INTERRUPTED";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4 = "HA_COMMUNICATION_INTERRUPTED_CLIENT4";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED = "HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6 = "HA_COMMUNICATION_INTERRUPTED_CLIENT6";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED = "HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED";
 extern const isc::log::MessageID HA_CONFIGURATION_FAILED = "HA_CONFIGURATION_FAILED";
 extern const isc::log::MessageID HA_CONFIGURATION_SUCCESSFUL = "HA_CONFIGURATION_SUCCESSFUL";
 extern const isc::log::MessageID HA_CONFIG_AUTO_FAILOVER_DISABLED = "HA_CONFIG_AUTO_FAILOVER_DISABLED";
@@ -96,6 +101,11 @@ const char* values[] = {
     "HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED", "an error upacking an option, caused subsequent options to be skipped: %1",
     "HA_BUFFER6_RECEIVE_UNPACK_FAILED", "failed to parse query from %1 to %2, received over interface %3, reason: %4",
     "HA_COMMAND_PROCESSED_FAILED", "command_processed callout failed: %1",
+    "HA_COMMUNICATION_INTERRUPTED", "communication is interrupted and failover process has been started",
+    "HA_COMMUNICATION_INTERRUPTED_CLIENT4", "%1: new client attempting to get a lease from the partner",
+    "HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED", "%1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state",
+    "HA_COMMUNICATION_INTERRUPTED_CLIENT6", "%1: new client attempting to get a lease from the partner",
+    "HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED", "%1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state",
     "HA_CONFIGURATION_FAILED", "failed to configure High Availability hooks library: %1",
     "HA_CONFIGURATION_SUCCESSFUL", "HA hook library has been successfully configured",
     "HA_CONFIG_AUTO_FAILOVER_DISABLED", "auto-failover disabled for %1",
index 89528421c50422316e996d97c2bf38f562a98f9c..ca6769b8d34ec897b626db656452373a8d800b38 100644 (file)
@@ -1,4 +1,4 @@
-// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Thu Apr 30 2020 12:21
+// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon May 18 2020 11:51
 
 #ifndef HA_MESSAGES_H
 #define HA_MESSAGES_H
@@ -17,6 +17,11 @@ extern const isc::log::MessageID HA_BUFFER6_RECEIVE_NOT_FOR_US;
 extern const isc::log::MessageID HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED;
 extern const isc::log::MessageID HA_BUFFER6_RECEIVE_UNPACK_FAILED;
 extern const isc::log::MessageID HA_COMMAND_PROCESSED_FAILED;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED;
 extern const isc::log::MessageID HA_CONFIGURATION_FAILED;
 extern const isc::log::MessageID HA_CONFIGURATION_SUCCESSFUL;
 extern const isc::log::MessageID HA_CONFIG_AUTO_FAILOVER_DISABLED;
index 951ae23b1e2fddf9769842948315c3e660879a56..4c8f757fb5df215c12ed9297d98e1e5caba4b7d9 100644 (file)
@@ -58,6 +58,53 @@ reason for failure.
 This error message is issued when the callout for the command_processed hook
 point failed. The argument contains a reason for the error.
 
+% HA_COMMUNICATION_INTERRUPTED communication is interrupted and failover process has been started
+This warning message is issued by the server which discovered that the
+communication to the active partner has been interrupted for a time
+period longer than the configured heartbeat-delay time. At this stage
+the server starts the failover procedure by monitoring the DHCP traffic
+sent to the partner and checking whether the partner server responds to
+this traffic. If the max-unacked-clients value is set to 0 such
+verification is disabled in which case the server will transition to
+the partner-down state.
+
+% HA_COMMUNICATION_INTERRUPTED_CLIENT4 %1: new client attempting to get a lease from the partner
+This informational message is issued when the surviving server observes
+a DHCP packet sent to the partner with which the commuication is interrupted.
+The client which packet is observed is not yet considered "unacked" because
+the secs field value does not exceed the configured threshold specified
+with max-ack-delay.
+
+% HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED %1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state
+This informational message is issued when the surviving server determines
+that its partner failed to respond to the DHCP query and that this client
+is considered to not be served by the partner. The surviving server counts
+such clients and if the number of such clients exceeds the max-unacked-clients
+threshold, the server will transition to the partner-down state. The first
+argument contains client identification information. The second argument
+specifies the number of clients which the server failed to respond to. The
+third argument specifies the number of additional clients which, if not
+provisioned, will cause the server to transition to the partner-down state.
+
+% HA_COMMUNICATION_INTERRUPTED_CLIENT6 %1: new client attempting to get a lease from the partner
+This informational message is issued when the surviving server observes
+a DHCP packet sent to the partner with which the commuication is interrupted.
+The client which packet is observed is not yet considered "unacked" because
+the elapsed time option value does not exceed the configured threshold
+specified with max-ack-delay. The sole argument specifies client
+identification information.
+
+% HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED %1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state
+This informational message is issued when the surviving server determines
+that its partner failed to respond to the DHCP query and that this client
+is considered to not be served by the partner. The surviving server counts
+such clients and if the number of such clients exceeds the max-unacked-clients
+threshold, the server will transition to the partner-down state. The first
+argument contains client identification information. The second argument
+specifies the number of clients which the server failed to respond to. The
+third argument specifies the number of additional clients which, if not
+provisioned, will cause the server to transition to the partner-down state.
+
 % HA_CONFIGURATION_FAILED failed to configure High Availability hooks library: %1
 This error message is issued when there is an error configuring the HA hooks
 library. The argument provides the detailed error message.
index e907b5320dc39bdb25d5d9a6bc5058ea33a784a4..a5ae0a93e89ee7d4344754895809d1db92216f13 100644 (file)
@@ -1349,6 +1349,10 @@ HAService::asyncSendHeartbeat() {
                 // We were unable to retrieve partner's state, so let's mark it
                 // as unavailable.
                 communication_state_->setPartnerState("unavailable");
+                // Log if the communication is interrupted.
+                if (communication_state_->isCommunicationInterrupted()) {
+                    LOG_WARN(ha_logger, HA_COMMUNICATION_INTERRUPTED);
+                }
             }
 
             // Whatever the result of the heartbeat was, the state machine needs