]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
dnsdist: Test the exponential back-off delays 12195/head
authorRemi Gacogne <remi.gacogne@powerdns.com>
Fri, 18 Nov 2022 11:03:02 +0000 (12:03 +0100)
committerRemi Gacogne <remi.gacogne@powerdns.com>
Fri, 18 Nov 2022 11:03:02 +0000 (12:03 +0100)
pdns/dnsdist.hh
pdns/dnsdistdist/dnsdist-backend.cc
pdns/dnsdistdist/test-dnsdistbackend_cc.cc

index d2b605ecfd7c9d95adfa2174c59309e4ef2bcb19..698aee212e5c398921f91a042230d59085bce929 100644 (file)
@@ -950,7 +950,7 @@ public:
     d_config.availability = Availability::Lazy;
     d_lazyHealthCheckStats.lock()->d_lastResults.set_capacity(d_config.d_lazyHealthCheckSampleSize);
   }
-  bool healthCheckRequired();
+  bool healthCheckRequired(std::optional<time_t> currentTime = std::nullopt);
 
   const string& getName() const {
     return d_config.name;
@@ -1058,7 +1058,7 @@ public:
   static bool s_randomizeIDs;
 private:
   void handleUDPTimeout(IDState& ids);
-  void updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled);
+  void updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled, std::optional<time_t> currentTime = std::nullopt);
 };
 using servers_t = vector<std::shared_ptr<DownstreamState>>;
 
index 066709384572d16be434862a825d1fb97a8bb3bc..562cb8abe84026f7be73a61583b63aaf7e6e7695 100644 (file)
@@ -499,7 +499,7 @@ IDState* DownstreamState::getIDState(unsigned int& selectedID, int64_t& generati
   return ids;
 }
 
-bool DownstreamState::healthCheckRequired()
+bool DownstreamState::healthCheckRequired(std::optional<time_t> currentTime)
 {
   if (d_config.availability == DownstreamState::Availability::Lazy) {
     auto stats = d_lazyHealthCheckStats.lock();
@@ -508,13 +508,13 @@ bool DownstreamState::healthCheckRequired()
       return true;
     }
     if (stats->d_status == LazyHealthCheckStats::LazyStatus::Failed) {
-      auto now = time(nullptr);
+      auto now = currentTime ? *currentTime : time(nullptr);
       if (stats->d_nextCheck <= now) {
         /* we update the next check time here because the check might time out,
            and we do not want to send a second check during that time unless
            the timer is actually very short */
         vinfolog("Sending health-check query for %s which is still in the Failed state", getNameWithAddr());
-        updateNextLazyHealthCheck(*stats, true);
+        updateNextLazyHealthCheck(*stats, true, now);
         return true;
       }
       return false;
@@ -569,9 +569,9 @@ time_t DownstreamState::getNextLazyHealthCheck()
   return stats->d_nextCheck;
 }
 
-void DownstreamState::updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled)
+void DownstreamState::updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled, std::optional<time_t> currentTime)
 {
-  auto now = time(nullptr);
+  auto now = currentTime ? * currentTime : time(nullptr);
   if (d_config.d_lazyHealthCheckUseExponentialBackOff) {
     if (stats.d_status == DownstreamState::LazyHealthCheckStats::LazyStatus::PotentialFailure) {
       /* we are still in the "up" state, we need to send the next query quickly to
index 7d196ace4a3776e9995d3de4019764601971ad23..983f91289ae70b720cc025bac59b01466763cd41 100644 (file)
@@ -222,8 +222,8 @@ BOOST_AUTO_TEST_CASE(test_LazyExponentialBackOff)
   config.d_lazyHealthCheckMinSampleCount = 11;
   config.d_lazyHealthCheckThreshold = 20;
   config.d_lazyHealthCheckUseExponentialBackOff = true;
-  config.d_lazyHealthCheckMaxBackOff = 60;
-  config.d_lazyHealthCheckFailedInterval = 30;
+  config.d_lazyHealthCheckMaxBackOff = 600;
+  config.d_lazyHealthCheckFailedInterval = 15;
   config.availability = DownstreamState::Availability::Lazy;
   /* prevents a re-connection */
   config.remote = ComboAddress("0.0.0.0");
@@ -253,17 +253,27 @@ BOOST_AUTO_TEST_CASE(test_LazyExponentialBackOff)
   BOOST_CHECK_EQUAL(ds.isUp(), true);
   BOOST_CHECK_EQUAL(ds.getStatus(), "up");
   BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true);
-  time_t failedCheckTime = time(nullptr);
+  time_t currentTime = time(nullptr);
   ds.submitHealthCheckResult(false, false);
 
   /* now we are in Failed state */
   BOOST_CHECK_EQUAL(ds.isUp(), false);
   BOOST_CHECK_EQUAL(ds.getStatus(), "down");
-  BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false);
+  BOOST_CHECK_EQUAL(ds.healthCheckRequired(currentTime), false);
   /* and the wait time between two checks will double every time a failure occurs */
-  BOOST_CHECK_EQUAL(ds.getNextLazyHealthCheck(), (failedCheckTime + (config.d_lazyHealthCheckFailedInterval * std::pow(2U, ds.currentCheckFailures))));
+  BOOST_CHECK_EQUAL(ds.getNextLazyHealthCheck(), (currentTime + (config.d_lazyHealthCheckFailedInterval * std::pow(2U, ds.currentCheckFailures))));
   BOOST_CHECK_EQUAL(ds.currentCheckFailures, 0U);
 
+  /* so after 5 failures */
+  const size_t nbFailures = 5;
+  for (size_t idx = 0; idx < nbFailures; idx++) {
+    currentTime = ds.getNextLazyHealthCheck();
+    BOOST_CHECK(ds.healthCheckRequired(currentTime));
+    ds.submitHealthCheckResult(false, false);
+  }
+  BOOST_CHECK_EQUAL(ds.currentCheckFailures, nbFailures);
+  BOOST_CHECK_EQUAL(ds.getNextLazyHealthCheck(), (currentTime + (config.d_lazyHealthCheckFailedInterval * std::pow(2U, ds.currentCheckFailures))));
+
   /* we need minRiseSuccesses successful health-checks to go up */
   BOOST_REQUIRE(config.minRiseSuccesses >= 1);
   for (size_t idx = 0; idx < static_cast<size_t>(config.minRiseSuccesses - 1); idx++) {