From: Remi Gacogne Date: Thu, 6 Oct 2022 14:19:06 +0000 (+0200) Subject: dnsdist: Add unit tests for the 'lazy health-check' feature X-Git-Tag: dnsdist-1.8.0-rc1~271^2~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c212e66e91d97e06ad14a5971513f41babcd1131;p=thirdparty%2Fpdns.git dnsdist: Add unit tests for the 'lazy health-check' feature Fixing a bug in the process, as one does. --- diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index c9df37bc57..91a50e1776 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -1034,6 +1034,7 @@ public: void reportTimeoutOrError(); void reportResponse(uint8_t rcode); void submitHealthCheckResult(bool initial, bool newState); + time_t getNextLazyHealthCheck(); dnsdist::Protocol getProtocol() const { diff --git a/pdns/dnsdistdist/Makefile.am b/pdns/dnsdistdist/Makefile.am index 8ada6aa9da..97ca86feca 100644 --- a/pdns/dnsdistdist/Makefile.am +++ b/pdns/dnsdistdist/Makefile.am @@ -304,6 +304,7 @@ testrunner_SOURCES = \ test-dnsdist-connections-cache.cc \ test-dnsdist-dnsparser.cc \ test-dnsdist_cc.cc \ + test-dnsdistbackend_cc.cc \ test-dnsdistdynblocks_hh.cc \ test-dnsdistkvs_cc.cc \ test-dnsdistlbpolicies_cc.cc \ diff --git a/pdns/dnsdistdist/dnsdist-backend.cc b/pdns/dnsdistdist/dnsdist-backend.cc index 8ba61774fc..175db7f401 100644 --- a/pdns/dnsdistdist/dnsdist-backend.cc +++ b/pdns/dnsdistdist/dnsdist-backend.cc @@ -45,6 +45,10 @@ bool DownstreamState::reconnect() return false; } + if (IsAnyAddress(d_config.remote)) { + return true; + } + connected = false; for (auto& fd : sockets) { if (fd != -1) { @@ -56,40 +60,38 @@ bool DownstreamState::reconnect() close(fd); fd = -1; } - if (!IsAnyAddress(d_config.remote)) { - fd = SSocket(d_config.remote.sin4.sin_family, SOCK_DGRAM, 0); + fd = SSocket(d_config.remote.sin4.sin_family, SOCK_DGRAM, 0); #ifdef SO_BINDTODEVICE - if (!d_config.sourceItfName.empty()) { - int res = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, d_config.sourceItfName.c_str(), d_config.sourceItfName.length()); - if (res != 0) { - infolog("Error setting up the interface on backend socket '%s': %s", d_config.remote.toStringWithPort(), stringerror()); - } + if (!d_config.sourceItfName.empty()) { + int res = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, d_config.sourceItfName.c_str(), d_config.sourceItfName.length()); + if (res != 0) { + infolog("Error setting up the interface on backend socket '%s': %s", d_config.remote.toStringWithPort(), stringerror()); } + } #endif - if (!IsAnyAddress(d_config.sourceAddr)) { - SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); + if (!IsAnyAddress(d_config.sourceAddr)) { + SSetsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 1); #ifdef IP_BIND_ADDRESS_NO_PORT - if (d_config.ipBindAddrNoPort) { - SSetsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, 1); - } -#endif - SBind(fd, d_config.sourceAddr); + if (d_config.ipBindAddrNoPort) { + SSetsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, 1); } +#endif + SBind(fd, d_config.sourceAddr); + } - try { - SConnect(fd, d_config.remote); - if (sockets.size() > 1) { - (*mplexer.lock())->addReadFD(fd, [](int, boost::any) {}); - } - connected = true; - } - catch (const std::runtime_error& error) { - infolog("Error connecting to new server with address %s: %s", d_config.remote.toStringWithPort(), error.what()); - connected = false; - break; + try { + SConnect(fd, d_config.remote); + if (sockets.size() > 1) { + (*mplexer.lock())->addReadFD(fd, [](int, boost::any) {}); } + connected = true; + } + catch (const std::runtime_error& error) { + infolog("Error connecting to new server with address %s: %s", d_config.remote.toStringWithPort(), error.what()); + connected = false; + break; } } @@ -515,7 +517,7 @@ bool DownstreamState::healthCheckRequired() if (stats->d_status == LazyHealthCheckStats::LazyStatus::Healthy) { auto& lastResults = stats->d_lastResults; size_t totalCount = lastResults.size(); - if (totalCount >= d_config.d_lazyHealthChecksMinSampleCount) { + if (totalCount < d_config.d_lazyHealthChecksMinSampleCount) { return false; } @@ -552,10 +554,18 @@ bool DownstreamState::healthCheckRequired() return false; } +time_t DownstreamState::getNextLazyHealthCheck() +{ + auto stats = d_lazyHealthCheckStats.lock(); + return stats->d_nextCheck; +} + void DownstreamState::submitHealthCheckResult(bool initial, bool newState) { if (initial) { - warnlog("Marking downstream %s as '%s'", getNameWithAddr(), newState ? "up" : "down"); + if (!IsAnyAddress(d_config.remote)) { + infolog("Marking downstream %s as '%s'", getNameWithAddr(), newState ? "up" : "down"); + } setUpStatus(newState); return; } @@ -603,7 +613,9 @@ void DownstreamState::submitHealthCheckResult(bool initial, bool newState) } if (newState != upStatus) { - warnlog("Marking downstream %s as '%s'", getNameWithAddr(), newState ? "up" : "down"); + if (!IsAnyAddress(d_config.remote)) { + infolog("Marking downstream %s as '%s'", getNameWithAddr(), newState ? "up" : "down"); + } if (newState && !isTCPOnly() && (!connected || d_config.reconnectOnUp)) { newState = reconnect(); diff --git a/pdns/dnsdistdist/test-dnsdistbackend_cc.cc b/pdns/dnsdistdist/test-dnsdistbackend_cc.cc new file mode 100644 index 0000000000..878ffacf6c --- /dev/null +++ b/pdns/dnsdistdist/test-dnsdistbackend_cc.cc @@ -0,0 +1,214 @@ + +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_NO_MAIN + +#include + +#include "dnsdist.hh" + +BOOST_AUTO_TEST_SUITE(dnsdistbackend_cc) + +BOOST_AUTO_TEST_CASE(test_Basic) +{ + DownstreamState::Config config; + DownstreamState ds(std::move(config), nullptr, false); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "down"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); + + ds.setUp(); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Up); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "UP"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); + + ds.setDown(); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Down); + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "DOWN"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); + + ds.setAuto(); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "down"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); + + ds.submitHealthCheckResult(true, true); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); +} + +BOOST_AUTO_TEST_CASE(test_MaxCheckFailures) +{ + const size_t maxCheckFailures = 5; + DownstreamState::Config config; + config.maxCheckFailures = maxCheckFailures; + /* prevents a re-connection */ + config.remote = ComboAddress("0.0.0.0"); + + DownstreamState ds(std::move(config), nullptr, false); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + ds.setUpStatus(true); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + + for (size_t idx = 0; idx < maxCheckFailures - 1; idx++) { + ds.submitHealthCheckResult(false, false); + } + + /* four failed checks is not enough */ + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + + /* but five is */ + ds.submitHealthCheckResult(false, false); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "down"); + + /* only one successful check is needed to go back up */ + ds.submitHealthCheckResult(false, true); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); +} + +BOOST_AUTO_TEST_CASE(test_Rise) +{ + const size_t minRise = 5; + DownstreamState::Config config; + config.minRiseSuccesses = minRise; + /* prevents a re-connection */ + config.remote = ComboAddress("0.0.0.0"); + + DownstreamState ds(std::move(config), nullptr, false); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "down"); + + for (size_t idx = 0; idx < minRise - 1; idx++) { + ds.submitHealthCheckResult(false, true); + } + + /* four successful checks is not enough */ + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "down"); + + /* but five is */ + ds.submitHealthCheckResult(false, true); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + + /* only one failed check is needed to go back down */ + ds.submitHealthCheckResult(false, false); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Auto); + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "down"); +} + +BOOST_AUTO_TEST_CASE(test_Lazy) +{ + DownstreamState::Config config; + config.minRiseSuccesses = 5; + config.maxCheckFailures = 3; + config.d_lazyHealthChecksMinSampleCount = 11; + config.d_lazyHealthChecksThreshold = 20; + config.availability = DownstreamState::Availability::Lazy; + /* prevents a re-connection */ + config.remote = ComboAddress("0.0.0.0"); + + DownstreamState ds(std::move(config), nullptr, false); + BOOST_CHECK(ds.d_config.availability == DownstreamState::Availability::Lazy); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); + + /* submit a few results, first successful ones */ + for (size_t idx = 0; idx < 5; idx++) { + ds.reportResponse(RCode::NoError); + } + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); + /* then failed ones */ + for (size_t idx = 0; idx < 5; idx++) { + ds.reportTimeoutOrError(); + } + + /* the threshold should be reached (50% > 20%) but we do not have enough sample yet + (10 < config.d_lazyHealthChecksMinSampleCount) */ + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); + + /* reporting one valid answer put us above the minimum number of samples, + and we are still above the threshold */ + ds.reportResponse(RCode::NoError); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); + + /* we should be in Potential Failure mode now, and thus always returning true */ + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); + + /* even if we fill the whole circular buffer with valid answers */ + for (size_t idx = 0; idx < config.d_lazyHealthChecksSampleSize; idx++) { + ds.reportResponse(RCode::NoError); + } + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); + + /* if we submit at least one valid health-check, we go back to Healthy */ + ds.submitHealthCheckResult(false, true); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); + + /* now let's reach the threshold again, this time just barely */ + for (size_t idx = 0; idx < config.d_lazyHealthChecksThreshold; idx++) { + ds.reportTimeoutOrError(); + } + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); + + /* we need maxCheckFailures failed health-checks to go down */ + for (size_t idx = 0; idx < config.maxCheckFailures - 1; idx++) { + ds.submitHealthCheckResult(false, false); + } + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), true); + time_t failedCheckTime = time(nullptr); + ds.submitHealthCheckResult(false, false); + + /* now we are in Failed state */ + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "down"); + BOOST_CHECK(ds.getNextLazyHealthCheck() == (failedCheckTime + config.d_lazyHealthChecksFailedInterval)); + + /* let fill the buffer with successes, it does not matter */ + for (size_t idx = 0; idx < config.d_lazyHealthChecksSampleSize; idx++) { + ds.reportResponse(RCode::NoError); + } + + /* we need minRiseSuccesses successful health-checks to go down */ + for (size_t idx = 0; idx < config.minRiseSuccesses - 1; idx++) { + ds.submitHealthCheckResult(false, true); + } + BOOST_CHECK_EQUAL(ds.isUp(), false); + BOOST_CHECK_EQUAL(ds.getStatus(), "down"); + + ds.submitHealthCheckResult(false, true); + BOOST_CHECK_EQUAL(ds.isUp(), true); + BOOST_CHECK_EQUAL(ds.getStatus(), "up"); + BOOST_CHECK_EQUAL(ds.healthCheckRequired(), false); +} + +BOOST_AUTO_TEST_SUITE_END()