From: Remi Gacogne Date: Thu, 20 Oct 2022 12:27:13 +0000 (+0200) Subject: dnsdist: Be more consistent in the naming if lazy health-check fields X-Git-Tag: dnsdist-1.8.0-rc1~271^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b80998094fc0cace3be4c36e98343794521c24ea;p=thirdparty%2Fpdns.git dnsdist: Be more consistent in the naming if lazy health-check fields --- diff --git a/pdns/dnsdist-lua.cc b/pdns/dnsdist-lua.cc index 2b35e948d3..758f928dbd 100644 --- a/pdns/dnsdist-lua.cc +++ b/pdns/dnsdist-lua.cc @@ -529,44 +529,44 @@ static void setupLuaConfig(LuaContext& luaCtx, bool client, bool configCheck) if (vars.count("lazyHealthCheckSampleSize")) { auto value = std::stoi(boost::get(vars.at("lazyHealthCheckSampleSize"))); checkParameterBound("lazyHealthCheckSampleSize", value); - config.d_lazyHealthChecksSampleSize = value; + config.d_lazyHealthCheckSampleSize = value; } if (vars.count("lazyHealthCheckMinSampleCount")) { auto value = std::stoi(boost::get(vars.at("lazyHealthCheckMinSampleCount"))); checkParameterBound("lazyHealthCheckMinSampleCount", value); - config.d_lazyHealthChecksMinSampleCount = value; + config.d_lazyHealthCheckMinSampleCount = value; } if (vars.count("lazyHealthCheckThreshold")) { auto value = std::stoi(boost::get(vars.at("lazyHealthCheckThreshold"))); checkParameterBound("lazyHealthCheckThreshold", value, std::numeric_limits::max()); - config.d_lazyHealthChecksThreshold = value; + config.d_lazyHealthCheckThreshold = value; } if (vars.count("lazyHealthCheckFailedInterval")) { auto value = std::stoi(boost::get(vars.at("lazyHealthCheckFailedInterval"))); checkParameterBound("lazyHealthCheckFailedInterval", value); - config.d_lazyHealthChecksFailedInterval = value; + config.d_lazyHealthCheckFailedInterval = value; } if (vars.count("lazyHealthCheckUseExponentialBackOff")) { - config.d_lazyHealthChecksUseExponentialBackOff = boost::get(vars.at("lazyHealthCheckUseExponentialBackOff")); + config.d_lazyHealthCheckUseExponentialBackOff = boost::get(vars.at("lazyHealthCheckUseExponentialBackOff")); } if (vars.count("lazyHealthCheckMaxBackOff")) { auto value = std::stoi(boost::get(vars.at("lazyHealthCheckMaxBackOff"))); checkParameterBound("lazyHealthCheckMaxBackOff", value); - config.d_lazyHealthChecksMaxBackOff = value; + config.d_lazyHealthCheckMaxBackOff = value; } if (vars.count("lazyHealthCheckMode")) { auto mode = boost::get(vars.at("lazyHealthCheckMode")); if (pdns_iequals(mode, "TimeoutOnly")) { - config.d_lazyHealthChecksMode = DownstreamState::LazyHealthCheckMode::TimeoutOnly; + config.d_lazyHealthCheckMode = DownstreamState::LazyHealthCheckMode::TimeoutOnly; } else if (pdns_iequals(mode, "TimeoutOrServFail")) { - config.d_lazyHealthChecksMode = DownstreamState::LazyHealthCheckMode::TimeoutOrServFail; + config.d_lazyHealthCheckMode = DownstreamState::LazyHealthCheckMode::TimeoutOrServFail; } else { warnlog("Ignoring unknown value '%s' for 'lazyHealthCheckMode' on 'newServer'", mode); diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index a129bab488..8bd2e80f89 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -807,12 +807,12 @@ struct DownstreamState: public std::enable_shared_from_this uint16_t d_retries{5}; uint16_t xpfRRCode{0}; uint16_t checkTimeout{1000}; /* in milliseconds */ - uint16_t d_lazyHealthChecksSampleSize{100}; - uint16_t d_lazyHealthChecksMinSampleCount{1}; - uint16_t d_lazyHealthChecksFailedInterval{30}; - uint16_t d_lazyHealthChecksMaxBackOff{3600}; - uint8_t d_lazyHealthChecksThreshold{20}; - LazyHealthCheckMode d_lazyHealthChecksMode{LazyHealthCheckMode::TimeoutOrServFail}; + uint16_t d_lazyHealthCheckSampleSize{100}; + uint16_t d_lazyHealthCheckMinSampleCount{1}; + uint16_t d_lazyHealthCheckFailedInterval{30}; + uint16_t d_lazyHealthCheckMaxBackOff{3600}; + uint8_t d_lazyHealthCheckThreshold{20}; + LazyHealthCheckMode d_lazyHealthCheckMode{LazyHealthCheckMode::TimeoutOrServFail}; uint8_t maxCheckFailures{1}; uint8_t minRiseSuccesses{1}; Availability availability{Availability::Auto}; @@ -828,7 +828,7 @@ struct DownstreamState: public std::enable_shared_from_this bool d_tcpCheck{false}; bool d_tcpOnly{false}; bool d_addXForwardedHeaders{false}; // for DoH backends - bool d_lazyHealthChecksUseExponentialBackOff{false}; + bool d_lazyHealthCheckUseExponentialBackOff{false}; bool d_upgradeToLazyHealthChecks{false}; }; @@ -948,7 +948,7 @@ public: } void setLazyAuto() { d_config.availability = Availability::Lazy; - d_lazyHealthCheckStats.lock()->d_lastResults.set_capacity(d_config.d_lazyHealthChecksSampleSize); + d_lazyHealthCheckStats.lock()->d_lastResults.set_capacity(d_config.d_lazyHealthCheckSampleSize); } bool healthCheckRequired(); diff --git a/pdns/dnsdistdist/dnsdist-backend.cc b/pdns/dnsdistdist/dnsdist-backend.cc index dd6886048f..c99d72101d 100644 --- a/pdns/dnsdistdist/dnsdist-backend.cc +++ b/pdns/dnsdistdist/dnsdist-backend.cc @@ -199,8 +199,8 @@ DownstreamState::DownstreamState(DownstreamState::Config&& config, std::shared_p setWeight(d_config.d_weight); } - if (d_config.availability == Availability::Lazy && d_config.d_lazyHealthChecksSampleSize > 0) { - d_lazyHealthCheckStats.lock()->d_lastResults.set_capacity(d_config.d_lazyHealthChecksSampleSize); + if (d_config.availability == Availability::Lazy && d_config.d_lazyHealthCheckSampleSize > 0) { + d_lazyHealthCheckStats.lock()->d_lastResults.set_capacity(d_config.d_lazyHealthCheckSampleSize); setUpStatus(true); } @@ -362,15 +362,15 @@ void DownstreamState::handleUDPTimeout(IDState& ids) void DownstreamState::reportResponse(uint8_t rcode) { - if (d_config.availability == Availability::Lazy && d_config.d_lazyHealthChecksSampleSize > 0) { - bool failure = d_config.d_lazyHealthChecksMode == LazyHealthCheckMode::TimeoutOrServFail ? rcode == RCode::ServFail : false; + if (d_config.availability == Availability::Lazy && d_config.d_lazyHealthCheckSampleSize > 0) { + bool failure = d_config.d_lazyHealthCheckMode == LazyHealthCheckMode::TimeoutOrServFail ? rcode == RCode::ServFail : false; d_lazyHealthCheckStats.lock()->d_lastResults.push_back(failure); } } void DownstreamState::reportTimeoutOrError() { - if (d_config.availability == Availability::Lazy && d_config.d_lazyHealthChecksSampleSize > 0) { + if (d_config.availability == Availability::Lazy && d_config.d_lazyHealthCheckSampleSize > 0) { d_lazyHealthCheckStats.lock()->d_lastResults.push_back(true); } } @@ -522,7 +522,7 @@ bool DownstreamState::healthCheckRequired() if (stats->d_status == LazyHealthCheckStats::LazyStatus::Healthy) { auto& lastResults = stats->d_lastResults; size_t totalCount = lastResults.size(); - if (totalCount < d_config.d_lazyHealthChecksMinSampleCount) { + if (totalCount < d_config.d_lazyHealthCheckMinSampleCount) { return false; } @@ -533,11 +533,11 @@ bool DownstreamState::healthCheckRequired() } } - const auto maxFailureRate = static_cast(d_config.d_lazyHealthChecksThreshold); + const auto maxFailureRate = static_cast(d_config.d_lazyHealthCheckThreshold); auto current = (100.0 * failures) / totalCount; if (current >= maxFailureRate) { lastResults.clear(); - vinfolog("Backend %s reached the lazy health-check threshold (%f out of %f, looking at sample of %d items with %d failures), moving to Potential Failure state", getNameWithAddr(), current, maxFailureRate, totalCount, failures); + vinfolog("Backend %s reached the lazy health-check threshold (%f%% out of %f%%, looking at sample of %d items with %d failures), moving to Potential Failure state", getNameWithAddr(), current, maxFailureRate, totalCount, failures); stats->d_status = LazyHealthCheckStats::LazyStatus::PotentialFailure; /* we update the next check time here because the check might time out, and we do not want to send a second check during that time unless @@ -572,26 +572,26 @@ time_t DownstreamState::getNextLazyHealthCheck() void DownstreamState::updateNextLazyHealthCheck(LazyHealthCheckStats& stats) { auto now = time(nullptr); - if (d_config.d_lazyHealthChecksUseExponentialBackOff) { + if (d_config.d_lazyHealthCheckUseExponentialBackOff) { if (stats.d_status == DownstreamState::LazyHealthCheckStats::LazyStatus::PotentialFailure) { /* we are still in the "up" state, we need to send the next query quickly to determine if the backend is really down */ - stats.d_nextCheck = now + d_config.d_lazyHealthChecksFailedInterval; + stats.d_nextCheck = now + d_config.d_lazyHealthCheckFailedInterval; } else if (consecutiveSuccessfulChecks > 0) { /* we are in 'Failed' state, but just had one (or more) successful check, so we want the next one to happen quite quickly as the backend might be available again. */ - stats.d_nextCheck = now + d_config.d_lazyHealthChecksFailedInterval; + stats.d_nextCheck = now + d_config.d_lazyHealthCheckFailedInterval; } else { const uint16_t failedTests = currentCheckFailures; size_t backOffCoeff = std::pow(2U, failedTests); - time_t backOff = d_config.d_lazyHealthChecksMaxBackOff; - if ((std::numeric_limits::max() / d_config.d_lazyHealthChecksFailedInterval) >= backOffCoeff) { - backOff = d_config.d_lazyHealthChecksFailedInterval * backOffCoeff; - if (backOff > d_config.d_lazyHealthChecksMaxBackOff || (std::numeric_limits::max() - now) <= backOff) { - backOff = d_config.d_lazyHealthChecksMaxBackOff; + time_t backOff = d_config.d_lazyHealthCheckMaxBackOff; + if ((std::numeric_limits::max() / d_config.d_lazyHealthCheckFailedInterval) >= backOffCoeff) { + backOff = d_config.d_lazyHealthCheckFailedInterval * backOffCoeff; + if (backOff > d_config.d_lazyHealthCheckMaxBackOff || (std::numeric_limits::max() - now) <= backOff) { + backOff = d_config.d_lazyHealthCheckMaxBackOff; } } @@ -599,7 +599,7 @@ void DownstreamState::updateNextLazyHealthCheck(LazyHealthCheckStats& stats) } } else { - stats.d_nextCheck = now + d_config.d_lazyHealthChecksFailedInterval; + stats.d_nextCheck = now + d_config.d_lazyHealthCheckFailedInterval; } } diff --git a/pdns/dnsdistdist/test-dnsdistbackend_cc.cc b/pdns/dnsdistdist/test-dnsdistbackend_cc.cc index 3e56c918a3..7d196ace4a 100644 --- a/pdns/dnsdistdist/test-dnsdistbackend_cc.cc +++ b/pdns/dnsdistdist/test-dnsdistbackend_cc.cc @@ -118,9 +118,9 @@ BOOST_AUTO_TEST_CASE(test_Lazy) DownstreamState::Config config; config.minRiseSuccesses = 5; config.maxCheckFailures = 3; - config.d_lazyHealthChecksMinSampleCount = 11; - config.d_lazyHealthChecksThreshold = 20; - config.d_lazyHealthChecksUseExponentialBackOff = false; + config.d_lazyHealthCheckMinSampleCount = 11; + config.d_lazyHealthCheckThreshold = 20; + config.d_lazyHealthCheckUseExponentialBackOff = false; config.availability = DownstreamState::Availability::Lazy; /* prevents a re-connection */ config.remote = ComboAddress("0.0.0.0"); @@ -142,7 +142,7 @@ BOOST_AUTO_TEST_CASE(test_Lazy) } /* the threshold should be reached (50% > 20%) but we do not have enough sample yet - (10 < config.d_lazyHealthChecksMinSampleCount) */ + (10 < config.d_lazyHealthCheckMinSampleCount) */ BOOST_CHECK_EQUAL(ds.isUp(), true); BOOST_CHECK_EQUAL(ds.getStatus(), "up"); BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); @@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(test_Lazy) BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); /* even if we fill the whole circular buffer with valid answers */ - for (size_t idx = 0; idx < config.d_lazyHealthChecksSampleSize; idx++) { + for (size_t idx = 0; idx < config.d_lazyHealthCheckSampleSize; idx++) { ds.reportResponse(RCode::NoError); } BOOST_CHECK_EQUAL(ds.isUp(), true); @@ -172,7 +172,7 @@ BOOST_AUTO_TEST_CASE(test_Lazy) BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); /* now let's reach the threshold again, this time just barely */ - for (size_t idx = 0; idx < config.d_lazyHealthChecksThreshold; idx++) { + for (size_t idx = 0; idx < config.d_lazyHealthCheckThreshold; idx++) { ds.reportTimeoutOrError(); } BOOST_CHECK_EQUAL(ds.isUp(), true); @@ -180,6 +180,7 @@ BOOST_AUTO_TEST_CASE(test_Lazy) BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); /* we need maxCheckFailures failed health-checks to go down */ + BOOST_REQUIRE(config.maxCheckFailures >= 1); for (size_t idx = 0; idx < static_cast(config.maxCheckFailures - 1); idx++) { ds.submitHealthCheckResult(false, false); } @@ -192,14 +193,15 @@ BOOST_AUTO_TEST_CASE(test_Lazy) /* now we are in Failed state */ BOOST_CHECK_EQUAL(ds.isUp(), false); BOOST_CHECK_EQUAL(ds.getStatus(), "down"); - BOOST_CHECK(ds.getNextLazyHealthCheck() == (failedCheckTime + config.d_lazyHealthChecksFailedInterval)); + BOOST_CHECK(ds.getNextLazyHealthCheck() == (failedCheckTime + config.d_lazyHealthCheckFailedInterval)); /* let fill the buffer with successes, it does not matter */ - for (size_t idx = 0; idx < config.d_lazyHealthChecksSampleSize; idx++) { + for (size_t idx = 0; idx < config.d_lazyHealthCheckSampleSize; idx++) { ds.reportResponse(RCode::NoError); } /* we need minRiseSuccesses successful health-checks to go up */ + BOOST_REQUIRE(config.minRiseSuccesses >= 1); for (size_t idx = 0; idx < static_cast(config.minRiseSuccesses - 1); idx++) { ds.submitHealthCheckResult(false, true); } @@ -217,11 +219,11 @@ BOOST_AUTO_TEST_CASE(test_LazyExponentialBackOff) DownstreamState::Config config; config.minRiseSuccesses = 5; config.maxCheckFailures = 3; - config.d_lazyHealthChecksMinSampleCount = 11; - config.d_lazyHealthChecksThreshold = 20; - config.d_lazyHealthChecksUseExponentialBackOff = true; - config.d_lazyHealthChecksMaxBackOff = 60; - config.d_lazyHealthChecksFailedInterval = 30; + config.d_lazyHealthCheckMinSampleCount = 11; + config.d_lazyHealthCheckThreshold = 20; + config.d_lazyHealthCheckUseExponentialBackOff = true; + config.d_lazyHealthCheckMaxBackOff = 60; + config.d_lazyHealthCheckFailedInterval = 30; config.availability = DownstreamState::Availability::Lazy; /* prevents a re-connection */ config.remote = ComboAddress("0.0.0.0"); @@ -233,7 +235,7 @@ BOOST_AUTO_TEST_CASE(test_LazyExponentialBackOff) BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); /* submit a few failed results */ - for (size_t idx = 0; idx < config.d_lazyHealthChecksMinSampleCount; idx++) { + for (size_t idx = 0; idx < config.d_lazyHealthCheckMinSampleCount; idx++) { ds.reportTimeoutOrError(); } BOOST_CHECK_EQUAL(ds.isUp(), true); @@ -244,6 +246,7 @@ BOOST_AUTO_TEST_CASE(test_LazyExponentialBackOff) BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); /* we need maxCheckFailures failed health-checks to go down */ + BOOST_REQUIRE(config.maxCheckFailures >= 1); for (size_t idx = 0; idx < static_cast(config.maxCheckFailures - 1); idx++) { ds.submitHealthCheckResult(false, false); } @@ -258,10 +261,11 @@ BOOST_AUTO_TEST_CASE(test_LazyExponentialBackOff) BOOST_CHECK_EQUAL(ds.getStatus(), "down"); BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); /* and the wait time between two checks will double every time a failure occurs */ - BOOST_CHECK_EQUAL(ds.getNextLazyHealthCheck(), (failedCheckTime + (config.d_lazyHealthChecksFailedInterval * std::pow(2U, ds.currentCheckFailures)))); + BOOST_CHECK_EQUAL(ds.getNextLazyHealthCheck(), (failedCheckTime + (config.d_lazyHealthCheckFailedInterval * std::pow(2U, ds.currentCheckFailures)))); BOOST_CHECK_EQUAL(ds.currentCheckFailures, 0U); /* we need minRiseSuccesses successful health-checks to go up */ + BOOST_REQUIRE(config.minRiseSuccesses >= 1); for (size_t idx = 0; idx < static_cast(config.minRiseSuccesses - 1); idx++) { ds.submitHealthCheckResult(false, true); }