]> git.ipfire.org Git - thirdparty/kea.git/commitdiff
[#3513] add HA clock skew to status-get
authorAndrei Pavel <andrei@isc.org>
Wed, 21 Aug 2024 10:18:17 +0000 (13:18 +0300)
committerAndrei Pavel <andrei@isc.org>
Thu, 22 Aug 2024 14:22:45 +0000 (17:22 +0300)
doc/sphinx/arm/hooks-ha.rst
src/hooks/dhcp/high_availability/communication_state.cc
src/hooks/dhcp/high_availability/communication_state.h
src/hooks/dhcp/high_availability/ha_service.cc
src/share/api/status-get.json

index 24f0efe970304b8c412ef25c91d909e48ed8aaae..f61b7e8b93ee35569588384c1a652199654bedae 100644 (file)
@@ -2106,21 +2106,24 @@ the HA status of two ``load-balancing`` servers:
                        "local": {
                            "role": "primary",
                            "scopes": [ "server1" ],
+                           "server-name": "server1",
                            "state": "load-balancing",
-                           "server-name": "server1"
+                           "system-time": "2024-01-01 12:00:00"
                        },
                        "remote": {
                            "age": 10,
+                           "analyzed-packets": 8,
+                           "clock-skew": 0,
+                           "communication-interrupted": true,
+                           "connecting-clients": 2,
                            "in-touch": true,
-                           "role": "secondary",
                            "last-scopes": [ "server2" ],
                            "last-state": "load-balancing",
-                           "communication-interrupted": true,
-                           "connecting-clients": 2,
+                           "role": "secondary",
+                           "server-name": "server2",
+                           "system-time": "2024-01-01 12:00:00",
                            "unacked-clients": 1,
-                           "unacked-clients-left": 2,
-                           "analyzed-packets": 8,
-                           "server-name": "server2"
+                           "unacked-clients-left": 2
                        }
                    }
                }
@@ -2167,6 +2170,11 @@ server may start monitoring the DHCP traffic directed to the partner to see if
 the partner is responding to this traffic. More about the failover procedure can
 be found in :ref:`ha-load-balancing-config`.
 
+The ``system-time`` parameters hold the UTC time in ``%Y-%m-%d %H:%M:%S`` format
+for each active node: local, and remote, respectively. The ``clock-skew``
+parameter is available in the ``remote`` map and holds the difference in seconds
+between the two times.
+
 The ``connecting-clients``, ``unacked-clients``, ``unacked-clients-left``, and
 ``analyzed-packets`` parameters were introduced along with the
 ``communication-interrupted`` parameter and they convey useful information about
index 39cc1ccbc4885e3ae6b6c2889f0d460bb1e3be76..a1f7771a47d31b14f46491564617a4856db3bc83 100644 (file)
@@ -535,12 +535,15 @@ CommunicationState::logFormatClockSkewInternal() const {
 
     // Note HttpTime resolution is only to seconds, so we use fractional
     // precision of zero when logging.
-    os << "my time: " << util::ptimeToText(my_time_at_skew_, 0)
-       << ", partner's time: " << util::ptimeToText(partner_time_at_skew_, 0)
+    os << "my time: " << ptimeToText(my_time_at_skew_, 0)
+       << ", partner's time: " << ptimeToText(partner_time_at_skew_, 0)
        << ", partner's clock is ";
 
-    // If negative clock skew, the partner's time is behind our time.
-    if (clock_skew_.is_negative()) {
+    if (clock_skew_.total_seconds() == 0) {
+        // Most common case.
+        os << "synchroninzed";
+    } else if (clock_skew_.is_negative()) {
+        // Partner's time is behind our time.
         os << clock_skew_.invert_sign().total_seconds() << "s behind";
     } else {
         // Partner's time is ahead of ours.
@@ -584,6 +587,8 @@ CommunicationState::getReport() const {
     }
     report->set("unacked-clients-left", Element::create(unacked_clients_left));
     report->set("analyzed-packets", Element::create(static_cast<long long>(getAnalyzedMessagesCount())));
+    report->set("system-time", Element::create(ptimeToText(getPartnerTimeAtSkew(), 0)));
+    report->set("clock-skew", Element::create(clock_skew_.total_seconds()));
 
     return (report);
 }
@@ -651,6 +656,24 @@ CommunicationState::setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_c
     partner_unsent_update_count_.second = unsent_update_count;
 }
 
+boost::posix_time::ptime
+CommunicationState::getMyTimeAtSkew() const {
+    if (my_time_at_skew_.is_not_a_date_time()) {
+        // Return current time.
+        return boost::posix_time::microsec_clock::universal_time();
+    }
+    return my_time_at_skew_;
+}
+
+boost::posix_time::ptime
+CommunicationState::getPartnerTimeAtSkew() const {
+    if (partner_time_at_skew_.is_not_a_date_time()) {
+        // Return current time.
+        return boost::posix_time::microsec_clock::universal_time();
+    }
+    return partner_time_at_skew_;
+}
+
 CommunicationState4::CommunicationState4(const IOServicePtr& io_service,
                                          const HAConfigPtr& config)
     : CommunicationState(io_service, config), connecting_clients_(),
index 85100e286e5eec4f93a904b2540971dc40ba3e2a..c379c214ecbd5453b15bddcb617e6f7186247b44 100644 (file)
@@ -698,6 +698,23 @@ private:
     /// the partner.
     void setPartnerUnsentUpdateCountInternal(uint64_t unsent_update_count);
 
+public:
+    /// @brief Retrieves the time of the local node when skew was last calculated.
+    ///
+    /// Used in reporting to the user, which is why being lenient with corner cases is important.
+    /// That is why if the time was not initialized yet, it is approximated to the current time.
+    ///
+    /// @return my time at skew
+    boost::posix_time::ptime getMyTimeAtSkew() const;
+
+    /// @brief Retrieves the time of the partner node when skew was last calculated.
+    ///
+    /// Used in reporting to the user, which is why being lenient with corner cases is important.
+    /// That is why if the time was not initialized yet, it is approximated to the current time.
+    ///
+    /// @return partner's time at skew
+    boost::posix_time::ptime getPartnerTimeAtSkew() const;
+
 protected:
     /// @brief Pointer to the common IO service instance.
     asiolink::IOServicePtr io_service_;
index b297858867d6f8d4ab62e59dfac300b74051f5c9..ec73e844288b48b82d5e4de976922bbcd2365415 100644 (file)
@@ -22,6 +22,7 @@
 #include <http/date_time.h>
 #include <http/response_json.h>
 #include <http/post_request_json.h>
+#include <util/boost_time_utils.h>
 #include <util/multi_threading_mgr.h>
 #include <util/stopwatch.h>
 #include <boost/pointer_cast.hpp>
@@ -1693,6 +1694,7 @@ HAService::processStatusGet() const {
     }
     local->set("scopes", list);
     local->set("server-name", Element::create(config_->getThisServerName()));
+    local->set("system-time", Element::create(ptimeToText(communication_state_->getMyTimeAtSkew(), 0)));
     ha_servers->set("local", local);
 
     // Do not include remote server information if this is a backup server or
@@ -3283,7 +3285,7 @@ HAService::clientCloseHandler(int tcp_native_fd) {
     if (tcp_native_fd >= 0) {
         IfaceMgr::instance().deleteExternalSocket(tcp_native_fd);
     }
-};
+}
 
 size_t
 HAService::pendingRequestSize() {
index d1f22ec70d6a76617ded8ccedfbf5580deca95da..89aa08661bc9b980d218f9b6590e251f1e6d36cc 100644 (file)
         "{",
         "    \"result\": <integer>,",
         "    \"arguments\": {",
-        "        \"pid\": <integer>,",
-        "        \"uptime\": <uptime in seconds>,",
-        "        \"reload\": <time since reload in seconds>,",
+        "        \"dhcp-state\": {",
+        "            \"disabled-by-db-connection\": false,",
+        "            \"disabled-by-local-command\": [],",
+        "            \"disabled-by-remote-command\": [],",
+        "            \"disabled-by-user\": false,",
+        "            \"globally-disabled\": false",
+        "        }",
+        "        \"extended-info-tables\": <whether relay information is held in lease tables>,",
         "        \"high-availability\": [",
         "            {",
         "                \"ha-mode\": <HA mode configured for this relationship>,",
         "                    \"local\": {",
         "                        \"role\": <role of this server as in the configuration file>,",
         "                        \"scopes\": <list of scope names served by this server>,",
-        "                        \"state\": <HA state name of the server receiving the command>",
+        "                        \"server-name\": <name of the local server>,",
+        "                        \"state\": <HA state name of the server receiving the command>,",
+        "                        \"system-time\": <system time in format '%Y-%m-%d %H:%M:%S' on UTC timezone>",
         "                    },",
         "                    \"remote\": {",
         "                        \"age\": <the age of the remote status in seconds>,",
-        "                        \"in-touch\": <indicates if this server communicated with remote>,",
+        "                        \"analyzed-packets\": <number of packets sent to the partner server since communication was interrupted>,",
+        "                        \"clock-skew\": <difference in seconds between local and partner server times>,",
+        "                        \"communication-interrupted\": <whether communication did not happen for more than max-response-delay milliseconds>,",
+        "                        \"connecting-clients\": <number of different clients getting a lease from partner>,",
+        "                        \"in-touch\": <indicates if this server communicated with partner>,",
         "                        \"last-scopes\": <list of scopes served by partner>,",
         "                        \"last-state\": <HA state name of the partner>,",
         "                        \"role\": <partner role>",
+        "                        \"server-name\": <name of the partner server>,",
+        "                        \"system-time\": <system time in format '%Y-%m-%d %H:%M:%S' on UTC timezone>,",
+        "                        \"unacked-clients\": <number of unacked clients>,",
+        "                        \"unacked-clients-left\": <how many more clients have to be unacked before partner-down state>",
         "                    }",
         "                }",
         "            }",
         "        ],",
         "        \"multi-threading-enabled\": true,",
-        "        \"thread-pool-size\": 4,",
         "        \"packet-queue-size\": 64,",
         "        \"packet-queue-statistics\": [ 1.2, 2.3, 3.4 ],",
+        "        \"pid\": <integer>,",
+        "        \"reload\": <time since reload in seconds>,",
         "        \"sockets\": {",
-        "            \"errors\": [ <error received during the last attempt to open all sockets> ],",
+        "            \"errors\": <list of errors received during the last attempt to open all sockets; only appears when status is failed or retrying>,",
         "            \"status\": <ready, retrying, or failed>",
-        "        },",
-        "        \"dhcp-state\": {",
-        "            \"disabled-by-db-connection\": false,",
-        "            \"disabled-by-local-command\": [],",
-        "            \"disabled-by-remote-command\": [],",
-        "            \"disabled-by-user\": false,",
-        "            \"globally-disabled\": false",
         "        }",
+        "        \"thread-pool-size\": 4,",
+        "        \"uptime\": <uptime in seconds>,",
         "    }",
         "}"
     ],