From 3efcb3b7e8e248f408461365f4dfdf54bef297d9 Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Fri, 18 Nov 2022 12:03:02 +0100 Subject: [PATCH] dnsdist: Test the exponential back-off delays --- pdns/dnsdist.hh | 4 ++-- pdns/dnsdistdist/dnsdist-backend.cc | 10 +++++----- pdns/dnsdistdist/test-dnsdistbackend_cc.cc | 20 +++++++++++++++----- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index d2b605ecfd..698aee212e 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -950,7 +950,7 @@ public: d_config.availability = Availability::Lazy; d_lazyHealthCheckStats.lock()->d_lastResults.set_capacity(d_config.d_lazyHealthCheckSampleSize); } - bool healthCheckRequired(); + bool healthCheckRequired(std::optional currentTime = std::nullopt); const string& getName() const { return d_config.name; @@ -1058,7 +1058,7 @@ public: static bool s_randomizeIDs; private: void handleUDPTimeout(IDState& ids); - void updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled); + void updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled, std::optional currentTime = std::nullopt); }; using servers_t = vector>; diff --git a/pdns/dnsdistdist/dnsdist-backend.cc b/pdns/dnsdistdist/dnsdist-backend.cc index 0667093845..562cb8abe8 100644 --- a/pdns/dnsdistdist/dnsdist-backend.cc +++ b/pdns/dnsdistdist/dnsdist-backend.cc @@ -499,7 +499,7 @@ IDState* DownstreamState::getIDState(unsigned int& selectedID, int64_t& generati return ids; } -bool DownstreamState::healthCheckRequired() +bool DownstreamState::healthCheckRequired(std::optional currentTime) { if (d_config.availability == DownstreamState::Availability::Lazy) { auto stats = d_lazyHealthCheckStats.lock(); @@ -508,13 +508,13 @@ bool DownstreamState::healthCheckRequired() return true; } if (stats->d_status == LazyHealthCheckStats::LazyStatus::Failed) { - auto now = time(nullptr); + auto now = currentTime ? *currentTime : time(nullptr); if (stats->d_nextCheck <= now) { /* we update the next check time here because the check might time out, and we do not want to send a second check during that time unless the timer is actually very short */ vinfolog("Sending health-check query for %s which is still in the Failed state", getNameWithAddr()); - updateNextLazyHealthCheck(*stats, true); + updateNextLazyHealthCheck(*stats, true, now); return true; } return false; @@ -569,9 +569,9 @@ time_t DownstreamState::getNextLazyHealthCheck() return stats->d_nextCheck; } -void DownstreamState::updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled) +void DownstreamState::updateNextLazyHealthCheck(LazyHealthCheckStats& stats, bool checkScheduled, std::optional currentTime) { - auto now = time(nullptr); + auto now = currentTime ? * currentTime : time(nullptr); if (d_config.d_lazyHealthCheckUseExponentialBackOff) { if (stats.d_status == DownstreamState::LazyHealthCheckStats::LazyStatus::PotentialFailure) { /* we are still in the "up" state, we need to send the next query quickly to diff --git a/pdns/dnsdistdist/test-dnsdistbackend_cc.cc b/pdns/dnsdistdist/test-dnsdistbackend_cc.cc index 7d196ace4a..983f91289a 100644 --- a/pdns/dnsdistdist/test-dnsdistbackend_cc.cc +++ b/pdns/dnsdistdist/test-dnsdistbackend_cc.cc @@ -222,8 +222,8 @@ BOOST_AUTO_TEST_CASE(test_LazyExponentialBackOff) config.d_lazyHealthCheckMinSampleCount = 11; config.d_lazyHealthCheckThreshold = 20; config.d_lazyHealthCheckUseExponentialBackOff = true; - config.d_lazyHealthCheckMaxBackOff = 60; - config.d_lazyHealthCheckFailedInterval = 30; + config.d_lazyHealthCheckMaxBackOff = 600; + config.d_lazyHealthCheckFailedInterval = 15; config.availability = DownstreamState::Availability::Lazy; /* prevents a re-connection */ config.remote = ComboAddress("0.0.0.0"); @@ -253,17 +253,27 @@ BOOST_AUTO_TEST_CASE(test_LazyExponentialBackOff) BOOST_CHECK_EQUAL(ds.isUp(), true); BOOST_CHECK_EQUAL(ds.getStatus(), "up"); BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); - time_t failedCheckTime = time(nullptr); + time_t currentTime = time(nullptr); ds.submitHealthCheckResult(false, false); /* now we are in Failed state */ BOOST_CHECK_EQUAL(ds.isUp(), false); BOOST_CHECK_EQUAL(ds.getStatus(), "down"); - BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(currentTime), false); /* and the wait time between two checks will double every time a failure occurs */ - BOOST_CHECK_EQUAL(ds.getNextLazyHealthCheck(), (failedCheckTime + (config.d_lazyHealthCheckFailedInterval * std::pow(2U, ds.currentCheckFailures)))); + BOOST_CHECK_EQUAL(ds.getNextLazyHealthCheck(), (currentTime + (config.d_lazyHealthCheckFailedInterval * std::pow(2U, ds.currentCheckFailures)))); BOOST_CHECK_EQUAL(ds.currentCheckFailures, 0U); + /* so after 5 failures */ + const size_t nbFailures = 5; + for (size_t idx = 0; idx < nbFailures; idx++) { + currentTime = ds.getNextLazyHealthCheck(); + BOOST_CHECK(ds.healthCheckRequired(currentTime)); + ds.submitHealthCheckResult(false, false); + } + BOOST_CHECK_EQUAL(ds.currentCheckFailures, nbFailures); + BOOST_CHECK_EQUAL(ds.getNextLazyHealthCheck(), (currentTime + (config.d_lazyHealthCheckFailedInterval * std::pow(2U, ds.currentCheckFailures)))); + /* we need minRiseSuccesses successful health-checks to go up */ BOOST_REQUIRE(config.minRiseSuccesses >= 1); for (size_t idx = 0; idx < static_cast(config.minRiseSuccesses - 1); idx++) { -- 2.47.2