]> git.ipfire.org Git - thirdparty/kea.git/commitdiff
[#103,!277] Add failure count for the CB fetch timer.
authorMarcin Siodelski <marcin@isc.org>
Mon, 25 Mar 2019 19:31:39 +0000 (20:31 +0100)
committerMarcin Siodelski <marcin@isc.org>
Tue, 26 Mar 2019 07:08:57 +0000 (03:08 -0400)
src/bin/dhcp4/ctrl_dhcp4_srv.cc
src/bin/dhcp4/ctrl_dhcp4_srv.h
src/bin/dhcp4/dhcp4_messages.cc
src/bin/dhcp4/dhcp4_messages.h
src/bin/dhcp4/dhcp4_messages.mes
src/bin/dhcp4/tests/kea_controller_unittest.cc
src/lib/process/tests/cb_ctl_base_unittests.cc

index a86b1b73698612873c94a82bf3ecdf5442ed013b..e7e990325b6c2f27e5c03467e2b0fa942a5908da 100644 (file)
@@ -690,10 +690,12 @@ ControlledDhcpv4Srv::processConfig(isc::data::ConstElementPtr config) {
                 fetch_time = 1000 * fetch_time;
             }
 
+            boost::shared_ptr<unsigned> failure_count(new unsigned(0));
             TimerMgr::instance()->
                 registerTimer("Dhcp4CBFetchTimer",
                               boost::bind(&ControlledDhcpv4Srv::cbFetchUpdates,
-                                          server_, CfgMgr::instance().getStagingCfg()),
+                                          server_, CfgMgr::instance().getStagingCfg(),
+                                          failure_count),
                               fetch_time,
                               asiolink::IntervalTimer::ONE_SHOT);
             TimerMgr::instance()->setup("Dhcp4CBFetchTimer");
@@ -968,17 +970,27 @@ ControlledDhcpv4Srv::dbLostCallback(ReconnectCtlPtr db_reconnect_ctl) {
 }
 
 void
-ControlledDhcpv4Srv::cbFetchUpdates(const SrvConfigPtr& srv_cfg) {
+ControlledDhcpv4Srv::cbFetchUpdates(const SrvConfigPtr& srv_cfg,
+                                    boost::shared_ptr<unsigned> failure_count) {
     try {
         // The true value indicates that the server should not reconnect
         // to the configuration backends and should take into account
         // audit entries stored in the database since last fetch.
         server_->getCBControl()->databaseConfigFetch(srv_cfg,
                                                      CBControlDHCPv4::FetchMode::FETCH_UPDATE);
+        (*failure_count) = 0;
 
     } catch (const std::exception& ex) {
         LOG_ERROR(dhcp4_logger, DHCP4_CB_FETCH_UPDATES_FAIL)
             .arg(ex.what());
+
+        // We allow at most 10 consecutive failures after which we stop
+        // making further attempts to fetch the configuration updates.
+        // Let's return without re-scheduling the timer.
+        if (++(*failure_count) > 10) {
+            LOG_ERROR(dhcp4_logger, DHCP4_CB_FETCH_UPDATES_RETRIES_EXHAUSTED);
+            return;
+        }
     }
 
     // Reschedule the timer to fetch new updates or re-try if
index c078e898cde5c5aab05e188527b2b4f7e089c1bc..c3120af2402e018e1539972f86f1146b4ca9c7e7 100644 (file)
@@ -368,7 +368,11 @@ private:
     ///
     /// @param srv_cfg Server configuration holding the database credentials
     /// and server tag.
-    void cbFetchUpdates(const SrvConfigPtr& srv_cfg);
+    /// @param failure_count pointer to failure counter which causes this
+    /// callback to stop scheduling the timer after 10 consecutive failures
+    /// to fetch the updates.
+    void cbFetchUpdates(const SrvConfigPtr& srv_cfg,
+                        boost::shared_ptr<unsigned> failure_count);
 
     /// @brief Static pointer to the sole instance of the DHCP server.
     ///
index 0f65549719af67ffa24295ebd3d234cbe23920c8..97b4cc495d2877578f303189501504695f7340f8 100644 (file)
@@ -1,4 +1,4 @@
-// File created from ../../../src/bin/dhcp4/dhcp4_messages.mes on Wed Mar 20 2019 11:09
+// File created from ../../../src/bin/dhcp4/dhcp4_messages.mes on Mon Mar 25 2019 20:13
 
 #include <cstddef>
 #include <log/message_types.h>
@@ -14,6 +14,7 @@ extern const isc::log::MessageID DHCP4_BUFFER_RECEIVE_FAIL = "DHCP4_BUFFER_RECEI
 extern const isc::log::MessageID DHCP4_BUFFER_UNPACK = "DHCP4_BUFFER_UNPACK";
 extern const isc::log::MessageID DHCP4_BUFFER_WAIT_SIGNAL = "DHCP4_BUFFER_WAIT_SIGNAL";
 extern const isc::log::MessageID DHCP4_CB_FETCH_UPDATES_FAIL = "DHCP4_CB_FETCH_UPDATES_FAIL";
+extern const isc::log::MessageID DHCP4_CB_FETCH_UPDATES_RETRIES_EXHAUSTED = "DHCP4_CB_FETCH_UPDATES_RETRIES_EXHAUSTED";
 extern const isc::log::MessageID DHCP4_CLASS_ASSIGNED = "DHCP4_CLASS_ASSIGNED";
 extern const isc::log::MessageID DHCP4_CLASS_UNCONFIGURED = "DHCP4_CLASS_UNCONFIGURED";
 extern const isc::log::MessageID DHCP4_CLASS_UNDEFINED = "DHCP4_CLASS_UNDEFINED";
@@ -146,6 +147,7 @@ const char* values[] = {
     "DHCP4_BUFFER_UNPACK", "parsing buffer received from %1 to %2 over interface %3",
     "DHCP4_BUFFER_WAIT_SIGNAL", "signal received while waiting for next packet, next waiting signal is %1",
     "DHCP4_CB_FETCH_UPDATES_FAIL", "error on attempt to fetch configuration updates from the configuration backend(s): %1",
+    "DHCP4_CB_FETCH_UPDATES_RETRIES_EXHAUSTED", "maximum number of configuration fetch attempts: 10, has been exhausted without success",
     "DHCP4_CLASS_ASSIGNED", "%1: client packet has been assigned to the following class(es): %2",
     "DHCP4_CLASS_UNCONFIGURED", "%1: client packet belongs to an unconfigured class: %2",
     "DHCP4_CLASS_UNDEFINED", "required class %1 has no definition",
index 1168c531f1f06b44048f20b25a41dbf06681bef5..74f7e4080ffcdce5fb0a400ce7448f7754c2c0bf 100644 (file)
@@ -1,4 +1,4 @@
-// File created from ../../../src/bin/dhcp4/dhcp4_messages.mes on Wed Mar 20 2019 11:09
+// File created from ../../../src/bin/dhcp4/dhcp4_messages.mes on Mon Mar 25 2019 20:13
 
 #ifndef DHCP4_MESSAGES_H
 #define DHCP4_MESSAGES_H
@@ -15,6 +15,7 @@ extern const isc::log::MessageID DHCP4_BUFFER_RECEIVE_FAIL;
 extern const isc::log::MessageID DHCP4_BUFFER_UNPACK;
 extern const isc::log::MessageID DHCP4_BUFFER_WAIT_SIGNAL;
 extern const isc::log::MessageID DHCP4_CB_FETCH_UPDATES_FAIL;
+extern const isc::log::MessageID DHCP4_CB_FETCH_UPDATES_RETRIES_EXHAUSTED;
 extern const isc::log::MessageID DHCP4_CLASS_ASSIGNED;
 extern const isc::log::MessageID DHCP4_CLASS_UNCONFIGURED;
 extern const isc::log::MessageID DHCP4_CLASS_UNDEFINED;
index 2cac51e41d7e7a99c22df55395a4665e2aed095f..4d0bd3a0a8ee504359ab049196620542c5feb380 100644 (file)
@@ -53,6 +53,13 @@ The server will re-try according to the configured value of the
 config-fetch-wait-time parameter. The sole argument contains the
 reason for failure.
 
+% DHCP4_CB_FETCH_UPDATES_RETRIES_EXHAUSTED maximum number of configuration fetch attempts: 10, has been exhausted without success
+This error indicates that the server has made a number of unsuccessful
+attempts to fetch configuration updates from a configuration backend.
+The server will continue to operate but won't make any further attempts
+to fetch configuration updates. The administrator must fix the configuration
+in the database and reload (or restart) the server.
+
 % DHCP4_CLASS_ASSIGNED %1: client packet has been assigned to the following class(es): %2
 This debug message informs that incoming packet has been assigned to specified
 class or classes. This is a normal behavior and indicates successful operation.
index 9f107199941716ca07ec4e8aeef47dc8a22dcea5..51360cc67bcb76801d350b85539f440791d0bb6c 100644 (file)
@@ -259,7 +259,7 @@ public:
         EXPECT_EQ(1, cb_control->getDatabaseConfigFetchCalls());
 
 
-        if (config_wait_fetch_time > 0) {
+        if ((config_wait_fetch_time > 0) && (!throw_during_fetch)) {
             // If we're configured to run the timer, we expect that it was
             // invoked at least 3 times. This is sufficient to verify that
             // the timer was scheduled and that the timer continued to run
@@ -274,10 +274,21 @@ public:
             EXPECT_GE(cb_control->getDatabaseConfigFetchCalls(), 3);
 
         } else {
-            // If the server is not configured to schedule the timer,
-            // we should still have one fetch attempt recorded.
             ASSERT_NO_THROW(runTimersWithTimeout(srv->getIOService(), 500));
-            EXPECT_EQ(1, cb_control->getDatabaseConfigFetchCalls());
+
+            if (throw_during_fetch) {
+                // If we're simulating the failure condition the number
+                // of consecutive failures should not exceed 10. Therefore
+                // the number of recorded fetches should be 12. One at
+                // startup, 10 failures and one that causes the timer
+                // to stop.
+                EXPECT_EQ(12, cb_control->getDatabaseConfigFetchCalls());
+
+            } else {
+                // If the server is not configured to schedule the timer,
+                // we should still have one fetch attempt recorded.
+                EXPECT_EQ(1, cb_control->getDatabaseConfigFetchCalls());
+            }
         }
     }
 
index 97836af1016fc85f0d11004d0925f0b39b8a7efe..8d75946b2174f04cf26eeadb652c27efe2167244 100644 (file)
@@ -590,6 +590,8 @@ TEST_F(CBControlBaseTest, fetchNoUpdates) {
     ASSERT_EQ(0, cb_ctl_.getMergesNum());
 }
 
+// This test verifies that database config fetch failures are handled
+// gracefully.
 TEST_F(CBControlBaseTest, fetchFailure) {
     auto config_base = makeConfigBase("type=db1");