]> git.ipfire.org Git - thirdparty/kea.git/commitdiff
[#1258] Ported 1.7.8 changes to status-get
authorMarcin Siodelski <marcin@isc.org>
Thu, 14 May 2020 15:52:30 +0000 (17:52 +0200)
committerMarcin Siodelski <marcin@isc.org>
Tue, 23 Jun 2020 17:40:57 +0000 (19:40 +0200)
12 files changed:
doc/sphinx/api/status-get.json
doc/sphinx/arm/hooks-ha.rst
src/hooks/dhcp/high_availability/communication_state.cc
src/hooks/dhcp/high_availability/communication_state.h
src/hooks/dhcp/high_availability/ha_impl.cc
src/hooks/dhcp/high_availability/ha_messages.cc
src/hooks/dhcp/high_availability/ha_messages.h
src/hooks/dhcp/high_availability/ha_messages.mes
src/hooks/dhcp/high_availability/ha_service.cc
src/hooks/dhcp/high_availability/tests/communication_state_unittest.cc
src/hooks/dhcp/high_availability/tests/ha_impl_unittest.cc
src/hooks/dhcp/high_availability/tests/ha_service_unittest.cc

index 9fc6286ed798c4cea627e73074efa3ba56668e8a..83efa70bda76d0af811f9ba197c45e28d8da8c5c 100644 (file)
         "        \"pid\": <integer>,",
         "        \"uptime\": <uptime in seconds>,",
         "        \"reload\": <time since reload in seconds>,",
-        "        \"ha-servers\": {",
-        "            \"local\": {",
-        "                \"role\": <role of this server as in the configuration file>,",
-        "                \"scopes\": <list of scope names served by this server>,",
-        "                \"state\": <HA state name of the server receiving the command>,",
-        "            },",
-        "            \"remote\": {",
-        "                \"age\": <the age of the remote status in seconds>,",
-        "                \"in-touch\": <indicates if this server communicated with remote>,",
-        "                \"last-scopes\": <list of scopes served by partner>,",
-        "                \"last-state\": <HA state name of the partner>,",
-        "                \"role\": <partner role>",
+        "        \"high-availability\": [",
+        "            {",
+        "                \"ha-mode\": <HA mode configured for this relationship>",
+        "                \"ha-servers\": {",
+        "                    \"local\": {",
+        "                        \"role\": <role of this server as in the configuration file>,",
+        "                        \"scopes\": <list of scope names served by this server>,",
+        "                        \"state\": <HA state name of the server receiving the command>,",
+        "                    },",
+        "                    \"remote\": {",
+        "                        \"age\": <the age of the remote status in seconds>,",
+        "                        \"in-touch\": <indicates if this server communicated with remote>,",
+        "                        \"last-scopes\": <list of scopes served by partner>,",
+        "                        \"last-state\": <HA state name of the partner>,",
+        "                        \"role\": <partner role>",
+        "                    }",
+        "                }",
         "            }",
-        "        }",
+        "        ]",
         "    }",
         "}"
     ],
index 7ff36646f0b6b4596c1adf1ac7c58d8f8118e72f..d3102f5bef3705855c74b3317a37b0f0f7f9ea02 100644 (file)
@@ -1243,23 +1243,48 @@ the HA status of two load balancing servers:
            "pid": 1234,
            "uptime": 3024,
            "reload": 1111,
-           "ha-servers": {
-               "local": {
-                   "role": "primary",
-                   "scopes": [ "server1" ],
-                   "state": "load-balancing"
-               },
-                "remote": {
-                   "age": 10,
-                   "in-touch": true,
-                   "role": "secondary",
-                   "last-scopes": [ "server2" ],
-                   "last-state": "load-balancing"
+           "high-availability": [
+               {
+                   "ha-mode": "load-balancing",
+                   "ha-servers": {
+                       "local": {
+                           "role": "primary",
+                           "scopes": [ "server1" ],
+                           "state": "load-balancing"
+                       },
+                       "remote": {
+                           "age": 10,
+                           "in-touch": true,
+                           "role": "secondary",
+                           "last-scopes": [ "server2" ],
+                           "last-state": "load-balancing",
+                           "communication-interrupted": true,
+                           "connecting-clients": 2,
+                           "unacked-clients": 1,
+                           "unacked-clients-left": 2,
+                           "analyzed-packets": 8
+                       }
+                   }
                }
-           }
+           ]
        }
    }
 
+The ``high-availability`` argument is a list which currently always comprises
+one element. There are plans to extend the HA implementation to facilitate
+multiple HA relationships for a single server instance. In that case, the
+returned list will comprise more elements, each describing the status of
+a different relationship in which the server participates. Currently, it
+is only one status.
+
+.. note::
+
+   In Kea 1.7.8 an incompatible change was introduced to the syntax of the
+   ``status-get`` response. Previously, the HA status for a single relationship
+   was returned within the ``arguments`` map. As of Kea 1.7.8, the returned status
+   is enclosed in the list as described above. Any existing code relying on the
+   previous syntax must be updated to work with the new Kea versions.
+
 
 The ``ha-servers`` map contains two structures: ``local`` and ``remote``. The former
 contains the status information of the server which received the command. The
@@ -1282,3 +1307,59 @@ send the ``status-get`` command to the partner server directly to check
 its current state. The ``age`` parameter specifies the number of seconds
 since the information from the partner was gathered (the age of this
 information).
+
+The ``communication-interrupted`` boolean value indicates if the server
+receiving the ``status-get`` command (local server) has been unable to
+communicate with the partner longer than the duration specified as
+``max-response-delay``. In such a situation we say that active servers are
+in the communication interrupted state or that the communication between
+them is interrupted. At this point, the local server may start monitoring
+the DHCP traffic directed to the partner to see if the partner is
+responding to this traffic. More about the failover procedure can be found
+in :ref:`ha-load-balancing-config`.
+
+The ``connecting-clients``, ``unacked-clients``, ``unacked-clients-left``
+and ``analyzed-packets`` parameters have been introduced together with the
+``communication-interrupted`` parameter in the Kea 1.7.8 release and they
+convey useful information about the state of the DHCP traffic monitoring
+in the communication interrupted state. If the server leaves the
+communication interrupted state these parameters are all reset to 0.
+
+These parameters have the following meaning in the communication interrupted
+state:
+
+-  ``connecting-clients`` - number of different clients which have attempted
+   to get a lease from the remote server. The clients are differentiated by
+   their MAC address and client identifier (in DHCPv4) or DUID (in DHCPv6).
+   This number includes both "unacked" clients (for which "secs" field or
+   "elapsed time" value exceeded the ``max-response-delay``).
+
+-  ``unacked-clients`` - number of different clients which have been considered
+   "unacked", i.e. the clients which have been trying to get the lease long
+   enough, so as the value of the "secs" field or "elapsed time" exceeded the
+   ``max-response-delay``.
+
+-  ``unacked-clients-left`` - number of additional clients which have to be
+   considered "unacked" before the server enters the partner-down state.
+   This value decreases when the ``unacked-clients`` value increases. The
+   local server will enter the ``partner-down`` state when this value
+   decreases to 0.
+
+-  ``analyzed-packets`` - total number of all packets directed to the partner
+   server and analyzed by the local server since entering the communication
+   interrupted state. It includes retransmissions from the same clients.
+
+Monitoring these values helps to predict when the local server will
+enter the partner-down state or why the server hasn't yet entered this
+state.
+
+The last parameter introduced in the Kea 1.7.8 release was the ``ha-mode``.
+It returns the HA mode of operation selected using the ``mode`` parameter
+in the configuration file. It can hold one of the following values:
+``load-balancing``, ``hot-standby`` or ``passive-backup``.
+
+The ``status-get`` response has the format described above only in the
+``load-balancing`` and ``hot-standby`` modes. In the ``passive-backup``
+mode the ``remote`` map is not included in the response because in this
+mode there is only one active server (local). The response comprises no
+information about the status of the backup servers.
index 8a1b15ff8d755e6b4bec57f75d1de2a99768c726..b92b9427951288e5d83732dc69363c07513259cf 100644 (file)
@@ -7,7 +7,9 @@
 #include <config.h>
 
 #include <communication_state.h>
+#include <ha_log.h>
 #include <ha_service_states.h>
+#include <cc/data.h>
 #include <exceptions/exceptions.h>
 #include <dhcp/dhcp4.h>
 #include <dhcp/dhcp6.h>
@@ -25,6 +27,7 @@ using namespace isc::asiolink;
 using namespace isc::data;
 using namespace isc::dhcp;
 using namespace isc::http;
+using namespace isc::log;
 using namespace boost::posix_time;
 
 namespace {
@@ -49,7 +52,7 @@ CommunicationState::CommunicationState(const IOServicePtr& io_service,
       poke_time_(boost::posix_time::microsec_clock::universal_time()),
       heartbeat_impl_(0), partner_state_(-1), partner_scopes_(),
       clock_skew_(0, 0, 0, 0), last_clock_skew_warn_(), my_time_at_skew_(),
-      partner_time_at_skew_() {
+      partner_time_at_skew_(), analyzed_messages_count_(0) {
 }
 
 CommunicationState::~CommunicationState() {
@@ -166,10 +169,11 @@ CommunicationState::poke() {
     // Set poke time to the current time.
     poke_time_ = boost::posix_time::microsec_clock::universal_time();
 
-    // If we have been tracking the unanswered DHCP messages directed to the
-    // partner, we need to clear any gathered information because the connection
+    // If we have been tracking the DHCP messages directed to the partner,
+    // we need to clear any gathered information because the connection
     // seems to be (re)established.
-    clearUnackedClients();
+    clearConnectingClients();
+    analyzed_messages_count_ = 0;
 
     if (timer_) {
         // Check the duration since last poke. If it is less than a second, we don't
@@ -200,6 +204,11 @@ CommunicationState::isCommunicationInterrupted() const {
     return (getDurationInMillisecs() > config_->getMaxResponseDelay());
 }
 
+size_t
+CommunicationState::getAnalyzedMessagesCount() const {
+    return (analyzed_messages_count_);
+}
+
 bool
 CommunicationState::clockSkewShouldWarn() {
     // First check if the clock skew is beyond the threshold.
@@ -274,9 +283,47 @@ CommunicationState::logFormatClockSkew() const {
     return (os.str());
 }
 
+ElementPtr
+CommunicationState::getReport() const {
+    auto report = Element::createMap();
+
+    auto in_touch = (getPartnerState() > 0);
+    report->set("in-touch", Element::create(in_touch));
+
+    auto age = in_touch ? static_cast<long long int>(getDurationInMillisecs() / 1000) : 0;
+    report->set("age", Element::create(age));
+
+    try {
+        report->set("last-state", Element::create(stateToString(getPartnerState())));
+
+    } catch (...) {
+        report->set("last-state", Element::create(std::string()));
+    }
+
+    auto list = Element::createList();
+    for (auto scope : getPartnerScopes()) {
+        list->add(Element::create(scope));
+    }
+    report->set("last-scopes", list);
+    report->set("communication-interrupted",
+                Element::create(isCommunicationInterrupted()));
+    report->set("connecting-clients", Element::create(static_cast<long long>(getConnectingClientsCount())));
+    report->set("unacked-clients", Element::create(static_cast<long long>(getUnackedClientsCount())));
+
+    long long unacked_clients_left = 0;
+    if (isCommunicationInterrupted() && (config_->getMaxUnackedClients() > getUnackedClientsCount())) {
+        unacked_clients_left = static_cast<long long>(config_->getMaxUnackedClients() -
+                                                      getUnackedClientsCount());
+    }
+    report->set("unacked-clients-left", Element::create(unacked_clients_left));
+    report->set("analyzed-packets", Element::create(static_cast<long long>(getAnalyzedMessagesCount())));
+
+    return (report);
+}
+
 CommunicationState4::CommunicationState4(const IOServicePtr& io_service,
                                          const HAConfigPtr& config)
-    : CommunicationState(io_service, config), unacked_clients_() {
+    : CommunicationState(io_service, config), connecting_clients_() {
 }
 
 void
@@ -287,6 +334,8 @@ CommunicationState4::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message)
         isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv4 message");
     }
 
+    ++analyzed_messages_count_;
+
     // Check value of the "secs" field by comparing it with the configured
     // threshold.
     uint16_t secs = msg->getSecs();
@@ -298,17 +347,11 @@ CommunicationState4::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message)
         secs = ((secs >> 8) | (secs << 8));
     }
 
-    // Check the value of the "secs" field. If it is below the threshold there
-    // is nothing to do. The "secs" field holds a value in seconds, hence we
-    // have to multiple by 1000 to get a value in milliseconds.
-    if (secs * 1000 <= config_->getMaxAckDelay()) {
-        return;
-    }
-
-    // The "secs" value is above the threshold so we should count it as unacked
-    // request, but we will first have to check if there is such request already
-    // recorded.
-    auto existing_requests = unacked_clients_.equal_range(msg->getHWAddr()->hwaddr_);
+    // Check the value of the "secs" field. The "secs" field holds a value in
+    // seconds, hence we have to multiple by 1000 to get a value in milliseconds.
+    // If the secs value is above the threshold, it means that the current
+    // client should be considered unacked.
+    auto unacked = (secs * 1000 > config_->getMaxAckDelay());
 
     // Client identifier will be stored together with the hardware address. It
     // may remain empty if the client hasn't specified it.
@@ -318,34 +361,75 @@ CommunicationState4::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message)
         client_id = opt_client_id->getData();
     }
 
-    // Iterate over the requests we found so far and see if we have a match with
-    // the client identifier (this includes empty client identifiers).
-    for (auto r = existing_requests.first; r != existing_requests.second; ++r) {
-        if (r->second == client_id) {
-            // There is a match so we have already recorded this client as
-            // unacked.
-            return;
+    bool log_unacked = false;
+
+    // Check if the given client was already recorded.
+    auto& idx = connecting_clients_.get<0>();
+    auto existing_request = idx.find(boost::make_tuple(msg->getHWAddr()->hwaddr_, client_id));
+    if (existing_request != idx.end()) {
+        // If the client was recorded and was not considered unacked
+        // but it should be considered unacked as a result of processing
+        // this packet, let's update the recorded request to mark the
+        // client unacked.
+        if (!existing_request->unacked_ && unacked) {
+            ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
+            idx.replace(existing_request, connecting_client);
+            log_unacked = true;
+        }
+
+    } else {
+        // This is the first time we see the packet from this client. Let's
+        // record it.
+        ConnectingClient4 connecting_client{ msg->getHWAddr()->hwaddr_, client_id, unacked };
+        idx.insert(connecting_client);
+        log_unacked = unacked;
+
+        if (!unacked) {
+            // This is the first time we see this client after getting into the
+            // communication interrupted state. But, this client hasn't been
+            // yet trying log enough to be considered unacked.
+            LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT4)
+                .arg(message->getLabel());
         }
     }
 
-    // New unacked client detected, so record the required information.
-    unacked_clients_.insert(std::make_pair(msg->getHWAddr()->hwaddr_, client_id));
+    // Only log the first time we detect a client is unacked.
+    if (log_unacked) {
+        unsigned unacked_left = 0;
+        if (config_->getMaxUnackedClients() > getUnackedClientsCount()) {
+            unacked_left = config_->getMaxUnackedClients() > getUnackedClientsCount();
+        }
+        LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED)
+            .arg(message->getLabel())
+            .arg(getUnackedClientsCount())
+            .arg(unacked_left);
+    }
 }
 
 bool
 CommunicationState4::failureDetected() const {
     return ((config_->getMaxUnackedClients() == 0) ||
-            (unacked_clients_.size() > config_->getMaxUnackedClients()));
+            (getUnackedClientsCount() > config_->getMaxUnackedClients()));
+}
+
+size_t
+CommunicationState4::getConnectingClientsCount() const {
+    return (connecting_clients_.size());
+}
+
+size_t
+CommunicationState4::getUnackedClientsCount() const {
+    return (connecting_clients_.get<1>().count(true));
 }
 
 void
-CommunicationState4::clearUnackedClients() {
-    unacked_clients_.clear();
+CommunicationState4::clearConnectingClients() {
+    connecting_clients_.clear();
 }
 
 CommunicationState6::CommunicationState6(const IOServicePtr& io_service,
                                          const HAConfigPtr& config)
-    : CommunicationState(io_service, config), unacked_clients_() {
+    : CommunicationState(io_service, config), connecting_clients_() {
 }
 
 void
@@ -356,32 +440,85 @@ CommunicationState6::analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message)
         isc_throw(BadValue, "DHCP message to be analyzed is not a DHCPv6 message");
     }
 
+    ++analyzed_messages_count_;
+
     // Check the value of the "elapsed time" option. If it is below the threshold
     // there is nothing to do. The "elapsed time" option holds the time in
     // 1/100 of second, hence we have to multiply by 10 to get a value in milliseconds.
     OptionUint16Ptr elapsed_time = boost::dynamic_pointer_cast<
         OptionUint16>(msg->getOption(D6O_ELAPSED_TIME));
-    if (!elapsed_time || elapsed_time->getValue() * 10 <= config_->getMaxAckDelay()) {
-        return;
-    }
+    auto unacked = (elapsed_time && elapsed_time->getValue() * 10 > config_->getMaxAckDelay());
 
     // Get the DUID of the client to see if it hasn't been recorded already.
     OptionPtr duid = msg->getOption(D6O_CLIENTID);
-    if (duid && unacked_clients_.count(duid->getData()) == 0) {
-        // New unacked client detected, so record the required information.
-        unacked_clients_.insert(duid->getData());
+    if (!duid) {
+        return;
+    }
+
+    bool log_unacked = false;
+
+    // Check if the given client was already recorded.
+    auto& idx = connecting_clients_.get<0>();
+    auto existing_request = idx.find(duid->getData());
+    if (existing_request != idx.end()) {
+        // If the client was recorded and was not considered unacked
+        // but it should be considered unacked as a result of processing
+        // this packet, let's update the recorded request to mark the
+        // client unacked.
+        if (!existing_request->unacked_ && unacked) {
+            ConnectingClient6 connecting_client{ duid->getData(), unacked };
+            idx.replace(existing_request, connecting_client);
+            log_unacked = true;
+        }
+
+    } else {
+        // This is the first time we see the packet from this client. Let's
+        // record it.
+        ConnectingClient6 connecting_client{ duid->getData(), unacked };
+        idx.insert(connecting_client);
+        log_unacked = unacked;
+
+        if (!unacked) {
+            // This is the first time we see this client after getting into the
+            // communication interrupted state. But, this client hasn't been
+            // yet trying log enough to be considered unacked.
+            LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT6)
+                .arg(message->getLabel());
+        }
+    }
+
+    // Only log the first time we detect a client is unacked.
+    if (log_unacked) {
+        unsigned unacked_left = 0;
+        if (config_->getMaxUnackedClients() > getUnackedClientsCount()) {
+            unacked_left = config_->getMaxUnackedClients() > getUnackedClientsCount();
+        }
+        LOG_INFO(ha_logger, HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED)
+            .arg(message->getLabel())
+            .arg(getUnackedClientsCount())
+            .arg(unacked_left);
     }
 }
 
 bool
 CommunicationState6::failureDetected() const {
     return ((config_->getMaxUnackedClients() == 0) ||
-            (unacked_clients_.size() > config_->getMaxUnackedClients()));
+            (getUnackedClientsCount() > config_->getMaxUnackedClients()));
+}
+
+size_t
+CommunicationState6::getConnectingClientsCount() const {
+    return (connecting_clients_.size());
+}
+
+size_t
+CommunicationState6::getUnackedClientsCount() const {
+    return (connecting_clients_.get<1>().count(true));
 }
 
 void
-CommunicationState6::clearUnackedClients() {
-    unacked_clients_.clear();
+CommunicationState6::clearConnectingClients() {
+    connecting_clients_.clear();
 }
 
 } // end of namespace isc::ha
index 69771ecb751bccf5c28b60680e3a1695133504a3..0350fbca94a5ab3d2a6b3f9f42262cc08b15c226 100644 (file)
 #include <dhcp/pkt.h>
 #include <boost/date_time/posix_time/posix_time.hpp>
 #include <boost/function.hpp>
+#include <boost/multi_index_container.hpp>
+#include <boost/multi_index/composite_key.hpp>
+#include <boost/multi_index/hashed_index.hpp>
+#include <boost/multi_index/indexed_by.hpp>
+#include <boost/multi_index/member.hpp>
+#include <boost/multi_index/ordered_index.hpp>
 #include <boost/shared_ptr.hpp>
 #include <map>
 #include <set>
@@ -101,10 +107,16 @@ public:
     /// @throw BadValue if unsupported state value was provided.
     void setPartnerState(const std::string& state);
 
+    /// @brief Returns scopes served by the partner server.
+    ///
+    /// @return A set of scopes served by the partner.
     std::set<std::string> getPartnerScopes() const {
         return (partner_scopes_);
     }
 
+    /// @brief Sets partner scopes.
+    ///
+    /// @param new_scopes Partner scopes enclosed in a JSON list.
     void setPartnerScopes(data::ConstElementPtr new_scopes);
 
     /// @brief Starts recurring heartbeat (public interface).
@@ -192,6 +204,13 @@ public:
     /// this method.
     virtual void analyzeMessage(const boost::shared_ptr<dhcp::Pkt>& message) = 0;
 
+    /// @brief Returns the number of analyzed messages while being in the
+    /// communications interrupted state.
+    ///
+    /// @return Number of analyzed messages. It includes retransmissions by
+    /// the same clients.
+    size_t getAnalyzedMessagesCount() const;
+
     /// @brief Checks if the partner failure has been detected based
     /// on the DHCP traffic analysis.
     ///
@@ -208,10 +227,31 @@ public:
     /// otherwise.
     virtual bool failureDetected() const = 0;
 
+    /// @brief Returns the current number of clients which attempted
+    /// to get a lease from the partner server.
+    ///
+    /// The returned number is reset to 0 when the server successfully
+    /// establishes communication with the partner. The number is
+    /// incremented only in the communications interrupted case.
+    ///
+    /// @return The number of clients including unacked clients.
+    virtual size_t getConnectingClientsCount() const = 0;
+
+    /// @brief Returns the current number of clients which haven't got
+    /// the lease from the partner server.
+    ///
+    /// The returned number is reset to 0 when the server successfully
+    /// establishes communication with the partner. The number is
+    /// incremented only in the communications interrupted case.
+    ///
+    /// @return Number of unacked clients.
+    virtual size_t getUnackedClientsCount() const = 0;
+
 protected:
 
-    /// @brief Removes information about clients which the partner server
-    /// failed to respond to.
+    /// @brief Removes information about the clients the partner server
+    /// should respond to while communication with the partner was
+    /// interrupted.
     ///
     /// This information is cleared by the @c CommunicationState::poke.
     /// The derivations of this class must provide DHCPv4 and DHCPv6 specific
@@ -221,7 +261,7 @@ protected:
     /// procedure starts over.
     ///
     /// See @c CommunicationState::analyzeMessage for details.
-    virtual void clearUnackedClients() = 0;
+    virtual void clearConnectingClients() = 0;
 
 public:
 
@@ -295,6 +335,15 @@ public:
     /// @brief Returns current clock skew value in the logger friendly format.
     std::string logFormatClockSkew() const;
 
+    /// @brief Returns the report about current communication state.
+    ///
+    /// This function returns a JSON map describing the state of communication
+    /// with a partner. This report is included in the response to the
+    /// status-get command.
+    ///
+    /// @return JSON element holding the report.
+    data::ElementPtr getReport() const;
+
 protected:
 
     /// @brief Pointer to the common IO service instance.
@@ -335,6 +384,9 @@ protected:
 
     /// @brief Partner reported time when skew was calculated.
     boost::posix_time::ptime partner_time_at_skew_;
+
+    /// @brief Total number of analyzed messages to be responded by partner.
+    size_t analyzed_messages_count_;
 };
 
 /// @brief Type of the pointer to the @c CommunicationState object.
@@ -379,21 +431,71 @@ public:
     /// otherwise.
     virtual bool failureDetected() const;
 
-protected:
+    /// @brief Returns the current number of clients which attempted
+    /// to get a lease from the partner server.
+    ///
+    /// The returned number is reset to 0 when the server successfully
+    /// establishes communication with the partner. The number is
+    /// incremented only in the communications interrupted case.
+    ///
+    /// @return The number of clients including unacked clients.
+    virtual size_t getConnectingClientsCount() const;
 
-    /// @brief Removes information about clients which the partner server
-    /// failed to respond to.
+    /// @brief Returns the current number of clients which haven't gotten
+    /// a lease from the partner server.
     ///
-    /// See @c CommunicationState::analyzeMessage for details.
-    virtual void clearUnackedClients();
+    /// The returned number is reset to 0 when the server successfully
+    /// establishes communication with the partner. The number is
+    /// incremented only in the communications interrupted case.
+    ///
+    /// @return Number of unacked clients.
+    virtual size_t getUnackedClientsCount() const;
 
-    /// @brief Holds information about the clients which the partner server
-    /// failed to respond to.
+protected:
+
+    /// @brief Removes information about the clients the partner server
+    /// should respond to while communication with the partner was
+    /// interrupted.
     ///
-    /// The key of the multimap holds hardware addresses of the clients.
-    /// The value of the multimap holds client identifiers of the
-    /// clients. The client identifiers may be empty.
-    std::multimap<std::vector<uint8_t>, std::vector<uint8_t> > unacked_clients_;
+    /// See @c CommunicationState::analyzeMessage for details.
+    virtual void clearConnectingClients();
+
+    /// @brief Structure holding information about the client which has
+    /// send the packet being analyzed.
+    struct ConnectingClient4 {
+        std::vector<uint8_t> hwaddr_;
+        std::vector<uint8_t> clientid_;
+        bool unacked_;
+    };
+
+    /// @brief Multi index container holding information about the clients
+    /// attempting to get leases from the partner server.
+    typedef boost::multi_index_container<
+        ConnectingClient4,
+        boost::multi_index::indexed_by<
+            // First index is a composite index which allows to find a client
+            // by the HW address/client identifier tuple.
+            boost::multi_index::hashed_unique<
+                boost::multi_index::composite_key<
+                    ConnectingClient4,
+                    boost::multi_index::member<ConnectingClient4, std::vector<uint8_t>,
+                                               &ConnectingClient4::hwaddr_>,
+                    boost::multi_index::member<ConnectingClient4, std::vector<uint8_t>,
+                                               &ConnectingClient4::clientid_>
+                >
+            >,
+            // Second index allows for counting all clients which are
+            // considered unacked.
+            boost::multi_index::ordered_non_unique<
+                boost::multi_index::member<ConnectingClient4, bool, &ConnectingClient4::unacked_>
+            >
+        >
+    > ConnectingClients4;
+
+    /// @brief Holds information about the clients attempting to contact
+    /// the partner server while the servers are in communications
+    /// interrupted state.
+    ConnectingClients4 connecting_clients_;
 };
 
 /// @brief Pointer to the @c CommunicationState4 object.
@@ -431,19 +533,64 @@ public:
     /// otherwise.
     virtual bool failureDetected() const;
 
-protected:
+    /// @brief Returns the current number of clients which attempted
+    /// to get a lease from the partner server.
+    ///
+    /// The returned number is reset to 0 when the server successfully
+    /// establishes communication with the partner. The number is
+    /// incremented only in the communications interrupted case.
+    ///
+    /// @return The number of clients including unacked clients.
+    virtual size_t getConnectingClientsCount() const;
 
-    /// @brief Removes information about clients which the partner server
-    /// failed to respond to.
+    /// @brief Returns the current number of clients which haven't gotten
+    /// a lease from the partner server.
     ///
-    /// See @c CommunicationState::analyzeMessage for details.
-    virtual void clearUnackedClients();
+    /// The returned number is reset to 0 when the server successfully
+    /// establishes communication with the partner. The number is
+    /// incremented only in the communications interrupted case.
+    ///
+    /// @return Number of unacked clients.
+    virtual size_t getUnackedClientsCount() const;
 
-    /// @brief Holds information about the clients which the partner server
-    /// failed to respond to.
+protected:
+
+    /// @brief Removes information about the clients the partner server
+    /// should respond to while communication with the partner was
+    /// interrupted.
     ///
-    /// The value of the set holds DUIDs of the clients.
-    std::set<std::vector<uint8_t> > unacked_clients_;
+    /// See @c CommunicationState::analyzeMessage for details.
+    virtual void clearConnectingClients();
+
+    /// @brief Structure holding information about a client which
+    /// sent a packet being analyzed.
+    struct ConnectingClient6 {
+        std::vector<uint8_t> duid_;
+        bool unacked_;
+    };
+
+    /// @brief Multi index container holding information about the clients
+    /// attempting to get leases from the partner server.
+    typedef boost::multi_index_container<
+        ConnectingClient6,
+        boost::multi_index::indexed_by<
+            // First index is for accessing connecting clients by DUID.
+            boost::multi_index::hashed_unique<
+                boost::multi_index::member<ConnectingClient6, std::vector<uint8_t>,
+                                           &ConnectingClient6::duid_>
+            >,
+            // Second index allows for counting all clients which are
+            // considered unacked.
+            boost::multi_index::ordered_non_unique<
+                boost::multi_index::member<ConnectingClient6, bool, &ConnectingClient6::unacked_>
+            >
+        >
+    > ConnectingClients6;
+
+    /// @brief Holds information about the clients attempting to contact
+    /// the partner server while the servers are in communications
+    /// interrupted state.
+    ConnectingClients6 connecting_clients_;
 };
 
 /// @brief Pointer to the @c CommunicationState6 object.
index 8c1070982aaaba05480a73fcbb536efe308f5e88..51fc76c4f5c4ef7d1ad234fbc6f33d4e51d90ee3 100644 (file)
@@ -277,8 +277,17 @@ HAImpl::commandProcessed(hooks::CalloutHandle& callout_handle) {
         // Add the ha servers info to arguments.
         ElementPtr mutable_resp_args =
             boost::const_pointer_cast<Element>(resp_args);
+
+        /// @todo Today we support only one HA relationship per Kea server.
+        /// In the future there will be more of them. Therefore we enclose
+        /// our sole relationship in a list.
+        auto ha_relationships = Element::createList();
+        auto ha_relationship = Element::createMap();
         ConstElementPtr ha_servers = service_->processStatusGet();
-        mutable_resp_args->set("ha-servers", ha_servers);
+        ha_relationship->set("ha-servers", ha_servers);
+        ha_relationship->set("ha-mode", Element::create(HAConfig::HAModeToString(config_->getHAMode())));
+        ha_relationships->add(ha_relationship);
+        mutable_resp_args->set("high-availability", ha_relationships);
     }
 }
 
index 7baeab48ec758538a080fdcf69855cfe8cdf6f63..2213bdfd0e4c0f95e2320f9c19b44dac66d3effd 100644 (file)
@@ -1,4 +1,4 @@
-// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon Jul 08 2019 13:20
+// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon May 18 2020 16:44
 
 #include <cstddef>
 #include <log/message_types.h>
@@ -16,6 +16,11 @@ extern const isc::log::MessageID HA_BUFFER6_RECEIVE_NOT_FOR_US = "HA_BUFFER6_REC
 extern const isc::log::MessageID HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED = "HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED";
 extern const isc::log::MessageID HA_BUFFER6_RECEIVE_UNPACK_FAILED = "HA_BUFFER6_RECEIVE_UNPACK_FAILED";
 extern const isc::log::MessageID HA_COMMAND_PROCESSED_FAILED = "HA_COMMAND_PROCESSED_FAILED";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED = "HA_COMMUNICATION_INTERRUPTED";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4 = "HA_COMMUNICATION_INTERRUPTED_CLIENT4";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED = "HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6 = "HA_COMMUNICATION_INTERRUPTED_CLIENT6";
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED = "HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED";
 extern const isc::log::MessageID HA_CONFIGURATION_FAILED = "HA_CONFIGURATION_FAILED";
 extern const isc::log::MessageID HA_CONFIGURATION_SUCCESSFUL = "HA_CONFIGURATION_SUCCESSFUL";
 extern const isc::log::MessageID HA_CONFIG_AUTO_FAILOVER_DISABLED = "HA_CONFIG_AUTO_FAILOVER_DISABLED";
@@ -85,6 +90,11 @@ const char* values[] = {
     "HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED", "an error upacking an option, caused subsequent options to be skipped: %1",
     "HA_BUFFER6_RECEIVE_UNPACK_FAILED", "failed to parse query from %1 to %2, received over interface %3, reason: %4",
     "HA_COMMAND_PROCESSED_FAILED", "command_processed callout failed: %1",
+    "HA_COMMUNICATION_INTERRUPTED", "communication with %1 is interrupted",
+    "HA_COMMUNICATION_INTERRUPTED_CLIENT4", "%1: new client attempting to get a lease from the partner",
+    "HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED", "%1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state",
+    "HA_COMMUNICATION_INTERRUPTED_CLIENT6", "%1: new client attempting to get a lease from the partner",
+    "HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED", "%1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state",
     "HA_CONFIGURATION_FAILED", "failed to configure High Availability hooks library: %1",
     "HA_CONFIGURATION_SUCCESSFUL", "HA hook library has been successfully configured",
     "HA_CONFIG_AUTO_FAILOVER_DISABLED", "auto-failover disabled for %1",
index 4fc5168f3f1aaac046b6363de115d01e92dcb2dc..fcfc51ea4abf36a7258838aa1d5d617e4dce8d62 100644 (file)
@@ -1,4 +1,4 @@
-// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon Jul 08 2019 13:20
+// File created from ../../../../src/hooks/dhcp/high_availability/ha_messages.mes on Mon May 18 2020 16:44
 
 #ifndef HA_MESSAGES_H
 #define HA_MESSAGES_H
@@ -17,6 +17,11 @@ extern const isc::log::MessageID HA_BUFFER6_RECEIVE_NOT_FOR_US;
 extern const isc::log::MessageID HA_BUFFER6_RECEIVE_PACKET_OPTIONS_SKIPPED;
 extern const isc::log::MessageID HA_BUFFER6_RECEIVE_UNPACK_FAILED;
 extern const isc::log::MessageID HA_COMMAND_PROCESSED_FAILED;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6;
+extern const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED;
 extern const isc::log::MessageID HA_CONFIGURATION_FAILED;
 extern const isc::log::MessageID HA_CONFIGURATION_SUCCESSFUL;
 extern const isc::log::MessageID HA_CONFIG_AUTO_FAILOVER_DISABLED;
index 0fc4d703a8ac529ffe1b1a06b10aaa22e0855375..7626ae0d6ac32b37c5678986006980224fbbf998 100644 (file)
@@ -58,6 +58,53 @@ reason for failure.
 This error message is issued when the callout for the command_processed hook
 point failed. The argument contains a reason for the error.
 
+% HA_COMMUNICATION_INTERRUPTED communication with %1 is interrupted
+This warning message is issued by the server which discovered that the
+communication to the active partner has been interrupted for a time
+period longer than the configured heartbeat-delay time. At this stage
+the server starts the failover procedure by monitoring the DHCP traffic
+sent to the partner and checking whether the partner server responds to
+this traffic. If the max-unacked-clients value is set to 0 such
+verification is disabled in which case the server will transition to
+the partner-down state.
+
+% HA_COMMUNICATION_INTERRUPTED_CLIENT4 %1: new client attempting to get a lease from the partner
+This informational message is issued when the surviving server observes
+a DHCP packet sent to the partner with which the commuication is interrupted.
+The client whose packet is observed is not yet considered "unacked" because
+the secs field value does not exceed the configured threshold specified
+with max-ack-delay.
+
+% HA_COMMUNICATION_INTERRUPTED_CLIENT4_UNACKED %1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state
+This informational message is issued when the surviving server determines
+that its partner failed to respond to the DHCP query and that this client
+is considered to not be served by the partner. The surviving server counts
+such clients and if the number of such clients exceeds the max-unacked-clients
+threshold, the server will transition to the partner-down state. The first
+argument contains client identification information. The second argument
+specifies the number of clients to which the server has failed to respond.
+The third argument specifies the number of additional clients which, if not
+provisioned, will cause the server to transition to the partner-down state.
+
+% HA_COMMUNICATION_INTERRUPTED_CLIENT6 %1: new client attempting to get a lease from the partner
+This informational message is issued when the surviving server observes
+a DHCP packet sent to the partner with which the commuication is interrupted.
+The client whose packet is observed is not yet considered "unacked" because
+the elapsed time option value does not exceed the configured threshold
+specified with max-ack-delay. The sole argument specifies client
+identification information.
+
+% HA_COMMUNICATION_INTERRUPTED_CLIENT6_UNACKED %1: partner server failed to respond, %2 clients unacked so far, %3 clients left before transitioning to the partner-down state
+This informational message is issued when the surviving server determines
+that its partner failed to respond to the DHCP query and that this client
+is considered to not be served by the partner. The surviving server counts
+such clients and if the number of such clients exceeds the max-unacked-clients
+threshold, the server will transition to the partner-down state. The first
+argument contains client identification information. The second argument
+specifies the number of clients to which the server has failed to respond.
+The third argument specifies the number of additional clients which, if not
+provisioned, will cause the server to transition to the partner-down state.
+
 % HA_CONFIGURATION_FAILED failed to configure High Availability hooks library: %1
 This error message is issued when there is an error configuring the HA hooks
 library. The argument provides the detailed error message.
index 36e0d14578eb98466d9130bea49bf96e16ca405f..43e721391838fd6de806e8f9f08750c7a80195a2 100644 (file)
@@ -978,19 +978,13 @@ HAService::processStatusGet() const {
     local->set("scopes", list);
     ha_servers->set("local", local);
 
-    // Remote part
-    ElementPtr remote = Element::createMap();
-
-    // Add the in-touch boolean flag to indicate whether there was any
-    // communication between the HA peers. Based on that, the user
-    // may determine if the status returned for the peer is based on
-    // the heartbeat or is to be determined.
-    auto in_touch = (communication_state_->getPartnerState() > 0);
-    remote->set("in-touch", Element::create(in_touch));
+    // Do not include remote server information if this is a backup server.
+    if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
+        return (ha_servers);
+    }
 
-    auto age = in_touch ?
-        static_cast<long long int>(communication_state_->getDurationInMillisecs() / 1000) : 0;
-    remote->set("age", Element::create(age));
+    // Remote part
+    ElementPtr remote = communication_state_->getReport();
 
     try {
         role = config_->getFailoverPeerConfig()->getRole();
@@ -1000,22 +994,6 @@ HAService::processStatusGet() const {
     } catch (...) {
         remote->set("role", Element::create(std::string()));
     }
-
-    try {
-        state = getPartnerState();
-        remote->set("last-state", Element::create(stateToString(state)));
-
-    } catch (...) {
-        remote->set("last-state", Element::create(std::string()));
-    }
-
-    // Remote server's scopes.
-    scopes = communication_state_->getPartnerScopes();
-    list = Element::createList();
-    for (auto scope : scopes) {
-        list->add(Element::create(scope));
-    }
-    remote->set("last-scopes", list);
     ha_servers->set("remote", remote);
 
     return (ha_servers);
@@ -1136,6 +1114,11 @@ HAService::asyncSendHeartbeat() {
                 // We were unable to retrieve partner's state, so let's mark it
                 // as unavailable.
                 communication_state_->setPartnerState("unavailable");
+                // Log if the communication is interrupted.
+                if (communication_state_->isCommunicationInterrupted()) {
+                    LOG_WARN(ha_logger, HA_COMMUNICATION_INTERRUPTED)
+                        .arg(partner_config->getName());
+                }
             }
 
             // Whatever the result of the heartbeat was, the state machine needs
index bce4d1a23c2116bde7a10a3dfeea397f98f359d1..6aefdaba4337b820d18ef2f837f3cd041464a461 100644 (file)
@@ -189,6 +189,9 @@ TEST_F(CommunicationStateTest, startHeartbeatInvalidValues) {
 TEST_F(CommunicationStateTest, detectFailureV4) {
     // Initially, there should be no unacked clients recorded.
     ASSERT_FALSE(state_.failureDetected());
+    EXPECT_EQ(0, state_.getUnackedClientsCount());
+    EXPECT_EQ(0, state_.getConnectingClientsCount());
+    EXPECT_EQ(0, state_.getAnalyzedMessagesCount());
 
     // The maximum number of unacked clients is 10. Let's provide 10
     // DHCPDISCOVER messages with the "secs" value of 15 which exceeds
@@ -207,6 +210,9 @@ TEST_F(CommunicationStateTest, detectFailureV4) {
             << "failure detected for the request number "
             << static_cast<int>(i);
     }
+    EXPECT_EQ(10, state_.getUnackedClientsCount());
+    EXPECT_EQ(10, state_.getConnectingClientsCount());
+    EXPECT_EQ(10, state_.getAnalyzedMessagesCount());
 
     // Let's provide similar set of requests but this time the "secs" field is
     // below the threshold. They should not be counted as failures. Also,
@@ -218,24 +224,36 @@ TEST_F(CommunicationStateTest, detectFailureV4) {
             << "failure detected for the request number "
             << static_cast<int>(i);
     }
+    EXPECT_EQ(10, state_.getUnackedClientsCount());
+    EXPECT_EQ(15, state_.getConnectingClientsCount());
+    EXPECT_EQ(20, state_.getAnalyzedMessagesCount());
 
     // Let's create a message from a new (not recorded yet) client with the
-    // "secs" field value below the threshold. It should not be recorded.
+    // "secs" field value below the threshold. It should not be counted as failure.
     ASSERT_NO_THROW(state_.analyzeMessage(createMessage4(DHCPDISCOVER, 10, 10, 6)));
 
     // Still no failure.
     ASSERT_FALSE(state_.failureDetected());
+    EXPECT_EQ(10, state_.getUnackedClientsCount());
+    EXPECT_EQ(16, state_.getConnectingClientsCount());
+    EXPECT_EQ(21, state_.getAnalyzedMessagesCount());
 
     // Let's repeat one of the requests which already have been recorded as
     // unacked but with a greater value of "secs" field. This should not
     // be counted because only new clients count.
     ASSERT_NO_THROW(state_.analyzeMessage(createMessage4(DHCPDISCOVER, 3, 3, 20)));
     ASSERT_FALSE(state_.failureDetected());
+    EXPECT_EQ(10, state_.getUnackedClientsCount());
+    EXPECT_EQ(16, state_.getConnectingClientsCount());
+    EXPECT_EQ(22, state_.getAnalyzedMessagesCount());
 
     // This time let's simulate a client with a MAC address already recorded but
     // with a client identifier. This should be counted as a new unacked request.
     ASSERT_NO_THROW(state_.analyzeMessage(createMessage4(DHCPDISCOVER, 7, 7, 15)));
     ASSERT_TRUE(state_.failureDetected());
+    EXPECT_EQ(11, state_.getUnackedClientsCount());
+    EXPECT_EQ(16, state_.getConnectingClientsCount());
+    EXPECT_EQ(23, state_.getAnalyzedMessagesCount());
 
     // Poking should cause all counters to reset as it is an indication that the
     // control connection has been re-established.
@@ -243,6 +261,9 @@ TEST_F(CommunicationStateTest, detectFailureV4) {
 
     // We're back to no failure state.
     EXPECT_FALSE(state_.failureDetected());
+    EXPECT_EQ(0, state_.getUnackedClientsCount());
+    EXPECT_EQ(0, state_.getConnectingClientsCount());
+    EXPECT_EQ(0, state_.getAnalyzedMessagesCount());
 
     // Send 11 DHCPDISCOVER messages with the "secs" field bytes swapped. Swapping
     // bytes was reported for some misbehaving Windows clients. The server should
@@ -257,6 +278,9 @@ TEST_F(CommunicationStateTest, detectFailureV4) {
             << static_cast<int>(i)
             << " when testing swapped secs field bytes";
     }
+    EXPECT_EQ(0, state_.getUnackedClientsCount());
+    EXPECT_EQ(11, state_.getConnectingClientsCount());
+    EXPECT_EQ(11, state_.getAnalyzedMessagesCount());
 
     // Repeat the same test, but this time either the first byte exceeds the
     // secs threshold or the second byte is non-zero. All should be counted
@@ -275,6 +299,9 @@ TEST_F(CommunicationStateTest, detectFailureV4) {
     ASSERT_NO_THROW(state_.analyzeMessage(createMessage4(DHCPDISCOVER, 11, 11,
                                                          0x30)));
     EXPECT_TRUE(state_.failureDetected());
+    EXPECT_EQ(11, state_.getUnackedClientsCount());
+    EXPECT_EQ(12, state_.getConnectingClientsCount());
+    EXPECT_EQ(22, state_.getAnalyzedMessagesCount());
 }
 
 // This test verifies that it is possible to disable analysis of the DHCPv4
@@ -289,6 +316,9 @@ TEST_F(CommunicationStateTest, failureDetectionDisabled4) {
 TEST_F(CommunicationStateTest, detectFailureV6) {
     // Initially, there should be no unacked clients recorded.
     ASSERT_FALSE(state6_.failureDetected());
+    EXPECT_EQ(0, state6_.getUnackedClientsCount());
+    EXPECT_EQ(0, state6_.getConnectingClientsCount());
+    EXPECT_EQ(0, state6_.getAnalyzedMessagesCount());
 
     // The maximum number of unacked clients is 10. Let's provide 10
     // Solicit messages with the "elapsed time" value of 1500 which exceeds
@@ -304,10 +334,13 @@ TEST_F(CommunicationStateTest, detectFailureV6) {
             << "failure detected for the request number "
             << static_cast<int>(i);
     }
+    EXPECT_EQ(10, state6_.getUnackedClientsCount());
+    EXPECT_EQ(10, state6_.getConnectingClientsCount());
+    EXPECT_EQ(10, state6_.getAnalyzedMessagesCount());
 
     // Let's provide similar set of requests but this time the "elapsed time" is
-    // below the threshold. They should not be counted as failures. Also,
-    // all of these requests have client identifier.
+    // below the threshold. This should not reduce the number of unacked or new
+    // clients.
     for (uint8_t i = 0; i < 10; ++i) {
         ASSERT_NO_THROW(state6_.analyzeMessage(createMessage6(DHCPV6_SOLICIT, i,
                                                              900)));
@@ -315,23 +348,35 @@ TEST_F(CommunicationStateTest, detectFailureV6) {
             << "failure detected for the request number "
             << static_cast<int>(i);
     }
+    EXPECT_EQ(10, state6_.getUnackedClientsCount());
+    EXPECT_EQ(10, state6_.getConnectingClientsCount());
+    EXPECT_EQ(20, state6_.getAnalyzedMessagesCount());
 
     // Let's create a message from a new (not recorded yet) client with the
-    // "elapsed time" value below the threshold. It should not be recorded.
+    // "elapsed time" value below the threshold. It should not count as failure.
     ASSERT_NO_THROW(state6_.analyzeMessage(createMessage6(DHCPV6_SOLICIT, 10, 600)));
 
     // Still no failure.
     ASSERT_FALSE(state6_.failureDetected());
+    EXPECT_EQ(10, state6_.getUnackedClientsCount());
+    EXPECT_EQ(11, state6_.getConnectingClientsCount());
+    EXPECT_EQ(21, state6_.getAnalyzedMessagesCount());
 
     // Let's repeat one of the requests which already have been recorded as
     // unacked but with a greater value of "elapsed time". This should not
     // be counted because only new clients count.
     ASSERT_NO_THROW(state6_.analyzeMessage(createMessage6(DHCPV6_SOLICIT, 3, 2000)));
     ASSERT_FALSE(state6_.failureDetected());
+    EXPECT_EQ(10, state6_.getUnackedClientsCount());
+    EXPECT_EQ(11, state6_.getConnectingClientsCount());
+    EXPECT_EQ(22, state6_.getAnalyzedMessagesCount());
 
-    // New unacked client should cause failure to the detected.
+    // New unacked client should cause failure to be detected.
     ASSERT_NO_THROW(state6_.analyzeMessage(createMessage6(DHCPV6_SOLICIT, 11, 1500)));
     ASSERT_TRUE(state6_.failureDetected());
+    EXPECT_EQ(11, state6_.getUnackedClientsCount());
+    EXPECT_EQ(12, state6_.getConnectingClientsCount());
+    EXPECT_EQ(23, state6_.getAnalyzedMessagesCount());
 
     // Poking should cause all counters to reset as it is an indication that the
     // control connection has been re-established.
@@ -339,6 +384,9 @@ TEST_F(CommunicationStateTest, detectFailureV6) {
 
     // We're back to no failure state.
     EXPECT_FALSE(state6_.failureDetected());
+    EXPECT_EQ(0, state6_.getUnackedClientsCount());
+    EXPECT_EQ(0, state6_.getConnectingClientsCount());
+    EXPECT_EQ(0, state6_.getAnalyzedMessagesCount());
 }
 
 // This test verifies that it is possible to disable analysis of the DHCPv6
@@ -446,4 +494,61 @@ TEST_F(CommunicationStateTest, logFormatClockSkew) {
     EXPECT_EQ(expected, log);
 }
 
+// Tests that the communication state report is correct.
+TEST_F(CommunicationStateTest, getReport) {
+    state_.setPartnerState("waiting");
+
+    auto scopes = Element::createList();
+    scopes->add(Element::create("server1"));
+    state_.setPartnerScopes(scopes);
+
+    state_.poke();
+
+    // Simulate the communications interrupted state.
+    state_.modifyPokeTime(-100);
+
+    // Send two DHCP packets of which one has secs value beyond the threshold and
+    // the other one lower than the threshold.
+    ASSERT_NO_THROW(state_.analyzeMessage(createMessage4(DHCPDISCOVER, 0, 0, 5)));
+    ASSERT_NO_THROW(state_.analyzeMessage(createMessage4(DHCPDISCOVER, 1, 0, 15)));
+
+    // Get the report.
+    auto report = state_.getReport();
+    ASSERT_TRUE(report);
+
+    // Compare with the expected output.
+    std::string expected = "{"
+        "    \"age\": 100,"
+        "    \"in-touch\": true,"
+        "    \"last-scopes\": [ \"server1\" ],"
+        "    \"last-state\": \"waiting\","
+        "    \"communication-interrupted\": true,"
+        "    \"connecting-clients\": 2,"
+        "    \"unacked-clients\": 1,"
+        "    \"unacked-clients-left\": 9,"
+        "    \"analyzed-packets\": 2"
+        "}";
+    EXPECT_TRUE(isEquivalent(Element::fromJSON(expected), report));
+}
+
+// Tests unusual values used to create the report.
+TEST_F(CommunicationStateTest, getReportDefaultValues) {
+    auto report = state_.getReport();
+    ASSERT_TRUE(report);
+
+    // Compare with the expected output.
+    std::string expected = "{"
+        "    \"age\": 0,"
+        "    \"in-touch\": false,"
+        "    \"last-scopes\": [ ],"
+        "    \"last-state\": \"\","
+        "    \"communication-interrupted\": false,"
+        "    \"connecting-clients\": 0,"
+        "    \"unacked-clients\": 0,"
+        "    \"unacked-clients-left\": 0,"
+        "    \"analyzed-packets\": 0"
+        "}";
+    EXPECT_TRUE(isEquivalent(Element::fromJSON(expected), report));
+}
+
 }
index c2f3575725f808d197d5fde898ceb5f8646f9f54..7f3b8989d333a15828385fbe0ff272aec0a6ac64 100644 (file)
@@ -561,20 +561,30 @@ TEST_F(HAImplTest, statusGet) {
     std::string expected =
         "{"
         "    \"arguments\": {"
-        "        \"ha-servers\": {"
-        "            \"local\": {"
-        "                \"role\": \"primary\","
-        "                \"scopes\": [  ],"
-        "                \"state\": \"waiting\""
-        "            },"
-        "            \"remote\": {"
-        "                \"age\": 0,"
-        "                \"in-touch\": false,"
-        "                \"last-scopes\": [ ],"
-        "                \"last-state\": \"\","
-        "                \"role\": \"secondary\""
+        "        \"high-availability\": ["
+        "            {"
+        "                \"ha-mode\": \"load-balancing\","
+        "                \"ha-servers\": {"
+        "                    \"local\": {"
+        "                        \"role\": \"primary\","
+        "                        \"scopes\": [  ],"
+        "                        \"state\": \"waiting\""
+        "                    },"
+        "                    \"remote\": {"
+        "                        \"age\": 0,"
+        "                        \"in-touch\": false,"
+        "                        \"last-scopes\": [ ],"
+        "                        \"last-state\": \"\","
+        "                        \"role\": \"secondary\","
+        "                        \"communication-interrupted\": false,"
+        "                        \"connecting-clients\": 0,"
+        "                        \"unacked-clients\": 0,"
+        "                        \"unacked-clients-left\": 0,"
+        "                        \"analyzed-packets\": 0"
+        "                    }"
+        "                }"
         "            }"
-        "        },"
+        "        ],"
         "        \"pid\": 1"
         "    },"
         "    \"result\": 0"
@@ -582,5 +592,52 @@ TEST_F(HAImplTest, statusGet) {
     EXPECT_TRUE(isEquivalent(got, Element::fromJSON(expected)));
 }
 
+// Tests status-get command processed handler for backup server.
+TEST_F(HAImplTest, statusGetBackupServer) {
+    TestHAImpl ha_impl;
+    ASSERT_NO_THROW(ha_impl.configure(createValidJsonConfiguration()));
+    ha_impl.config_->setThisServerName("server3");
+
+    // Starting the service is required prior to running any callouts.
+    NetworkStatePtr network_state(new NetworkState(NetworkState::DHCPv4));
+    ASSERT_NO_THROW(ha_impl.startService(io_service_, network_state,
+                                         HAServerType::DHCPv4));
+
+    std::string name = "status-get";
+    ConstElementPtr response =
+        Element::fromJSON("{ \"arguments\": { \"pid\": 1 }, \"result\": 0 }");
+
+    CalloutHandlePtr callout_handle = HooksManager::createCalloutHandle();
+
+    callout_handle->setArgument("name", name);
+    callout_handle->setArgument("response", response);
+
+    ASSERT_NO_THROW(ha_impl.commandProcessed(*callout_handle));
+
+    ConstElementPtr got;
+    callout_handle->getArgument("response", got);
+    ASSERT_TRUE(got);
+
+    std::string expected =
+        "{"
+        "    \"arguments\": {"
+        "        \"high-availability\": ["
+        "            {"
+        "                \"ha-mode\": \"load-balancing\","
+        "                \"ha-servers\": {"
+        "                    \"local\": {"
+        "                        \"role\": \"backup\","
+        "                        \"scopes\": [  ],"
+        "                        \"state\": \"backup\""
+        "                    }"
+        "                }"
+        "            }"
+        "        ],"
+        "        \"pid\": 1"
+        "    },"
+        "    \"result\": 0"
+        "}";
+    EXPECT_TRUE(isEquivalent(got, Element::fromJSON(expected)));
+}
 
 }
index d3f09599da68f885055f6743a0dd4546cac38dde..2e9eb90b6fb70af899a4282c4f627fe09f6dbdbd 100644 (file)
@@ -1080,7 +1080,12 @@ TEST_F(HAServiceTest, hotStandbyScopeSelectionThisPrimary) {
         "        \"in-touch\": false,"
         "        \"role\": \"standby\","
         "        \"last-scopes\": [ ],"
-        "        \"last-state\": \"\""
+        "        \"last-state\": \"\","
+        "        \"communication-interrupted\": false,"
+        "        \"connecting-clients\": 0,"
+        "        \"unacked-clients\": 0,"
+        "        \"unacked-clients-left\": 0,"
+        "        \"analyzed-packets\": 0"
         "    }"
         "}";
     EXPECT_TRUE(isEquivalent(Element::fromJSON(expected), ha_servers));
@@ -1128,7 +1133,12 @@ TEST_F(HAServiceTest, hotStandbyScopeSelectionThisStandby) {
         "        \"in-touch\": false,"
         "        \"role\": \"primary\","
         "        \"last-scopes\": [ ],"
-        "        \"last-state\": \"\""
+        "        \"last-state\": \"\","
+        "        \"communication-interrupted\": false,"
+        "        \"connecting-clients\": 0,"
+        "        \"unacked-clients\": 0,"
+        "        \"unacked-clients-left\": 0,"
+        "        \"analyzed-packets\": 0"
         "    }"
         "}";
     EXPECT_TRUE(isEquivalent(Element::fromJSON(expected), ha_servers));
@@ -3124,7 +3134,12 @@ TEST_F(HAServiceStateMachineTest, waitingParterDownLoadBalancingPartnerDown) {
         "        \"in-touch\": true,"
         "        \"role\": \"secondary\","
         "        \"last-scopes\": [ \"server1\", \"server2\" ],"
-        "        \"last-state\": \"ready\""
+        "        \"last-state\": \"ready\","
+        "        \"communication-interrupted\": false,"
+        "        \"connecting-clients\": 0,"
+        "        \"unacked-clients\": 0,"
+        "        \"unacked-clients-left\": 0,"
+        "        \"analyzed-packets\": 0"
         "    }"
         "}";
     EXPECT_TRUE(isEquivalent(Element::fromJSON(expected), ha_servers));