From: Otto Date: Mon, 8 Mar 2021 08:48:43 +0000 (+0100) Subject: Make the non-resolving cache even less aggressive and add a unit test. X-Git-Tag: rec-4.5.0-alpha3^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b5715cffd756c24eb126f370c96a7b865aab4528;p=thirdparty%2Fpdns.git Make the non-resolving cache even less aggressive and add a unit test. In particular, we clear the entry if we see a succeeding resolve. This means that we only a couple of failing resolves in a row will cause a ban. Also change to default non-resolving-ns-max-fails from 1 to 5. --- diff --git a/pdns/pdns_recursor.cc b/pdns/pdns_recursor.cc index b9e79acb5b..27540384b0 100644 --- a/pdns/pdns_recursor.cc +++ b/pdns/pdns_recursor.cc @@ -5388,7 +5388,7 @@ int main(int argc, char **argv) ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60"; ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")=""; ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")=""; - ::arg().set("non-resolving-ns-max-fails", "Number of failed address resolves of a nameserver to start throttling it, 0 is disabled")="1"; + ::arg().set("non-resolving-ns-max-fails", "Number of failed address resolves of a nameserver to start throttling it, 0 is disabled")="5"; ::arg().set("non-resolving-ns-throttle-time", "Number of seconds the throttle a nameserver with a name failing to resolve")="60"; ::arg().set("hint-file", "If set, load root hints from this file")=""; diff --git a/pdns/recursordist/docs/settings.rst b/pdns/recursordist/docs/settings.rst index 4a76db041b..acdb30c667 100644 --- a/pdns/recursordist/docs/settings.rst +++ b/pdns/recursordist/docs/settings.rst @@ -1269,7 +1269,7 @@ Number of milliseconds to wait for a remote authoritative server to respond. .. versionadded:: 4.5.0 - Integer -- Default: 1 +- Default: 5 Number of failed address resolves of a nameserver name to start throttling it, 0 is disabled. Nameservers matching :ref:`setting-dont-throttle-names` will not be throttled. diff --git a/pdns/recursordist/test-syncres_cc.cc b/pdns/recursordist/test-syncres_cc.cc index 0fee7386f4..8704279759 100644 --- a/pdns/recursordist/test-syncres_cc.cc +++ b/pdns/recursordist/test-syncres_cc.cc @@ -176,6 +176,8 @@ void initSR(bool debug) SyncRes::clearDontQuery(); SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32")); SyncRes::s_qnameminimization = false; + SyncRes::s_nonresolvingnsmaxfails = 0; + SyncRes::s_nonresolvingnsthrottletime = 0; SyncRes::clearNSSpeeds(); BOOST_CHECK_EQUAL(SyncRes::getNSSpeedsSize(), 0U); @@ -185,6 +187,8 @@ void initSR(bool debug) BOOST_CHECK_EQUAL(SyncRes::getThrottledServersSize(), 0U); SyncRes::clearFailedServers(); BOOST_CHECK_EQUAL(SyncRes::getFailedServersSize(), 0U); + SyncRes::clearNonResolvingNS(); + BOOST_CHECK_EQUAL(SyncRes::getNonResolvingNSSize(), 0U); SyncRes::clearECSStats(); diff --git a/pdns/recursordist/test-syncres_cc2.cc b/pdns/recursordist/test-syncres_cc2.cc index 065adc5b80..8f2da0e7b9 100644 --- a/pdns/recursordist/test-syncres_cc2.cc +++ b/pdns/recursordist/test-syncres_cc2.cc @@ -162,6 +162,77 @@ BOOST_AUTO_TEST_CASE(test_glueless_referral_loop) BOOST_CHECK_EQUAL(queriesToNS, 16U); } +BOOST_AUTO_TEST_CASE(test_glueless_referral_loop_with_nonresolving) +{ + std::unique_ptr sr; + initSR(sr); + + // We only do v4, this avoids "beenthere" non-deterministic behavour. If we do both v4 and v6, there are multiple IPs + // per (root) nameserver, and the "beenthere" loop detection is influenced by the particular address family selected. + // To see the non-deterministic behaviour, uncomment the line below (you'll be seeing around 21-24 queries). + // See #9565 + SyncRes::s_doIPv6 = false; + + primeHints(); + + const DNSName target1("powerdns.com."); + const DNSName target2("powerdns.org."); + size_t queriesToNS = 0; + + sr->setAsyncCallback([target1, target2, &queriesToNS](const ComboAddress& ip, const DNSName& domain, int type, bool doTCP, bool sendRDQuery, int EDNS0Level, struct timeval* now, boost::optional& srcmask, boost::optional context, LWResult* res, bool* chained) { + queriesToNS++; + + if (isRootServer(ip)) { + setLWResult(res, 0, false, false, true); + + if (domain.isPartOf(DNSName("com."))) { + addRecordToLW(res, "com.", QType::NS, "a.gtld-servers.net.", DNSResourceRecord::AUTHORITY, 172800); + } + else if (domain.isPartOf(DNSName("org."))) { + addRecordToLW(res, "org.", QType::NS, "a.gtld-servers.net.", DNSResourceRecord::AUTHORITY, 172800); + } + else { + setLWResult(res, RCode::NXDomain, false, false, true); + return LWResult::Result::Success; + } + + addRecordToLW(res, "a.gtld-servers.net.", QType::A, "192.0.2.1", DNSResourceRecord::ADDITIONAL, 3600); + addRecordToLW(res, "a.gtld-servers.net.", QType::AAAA, "2001:DB8::1", DNSResourceRecord::ADDITIONAL, 3600); + return LWResult::Result::Success; + } + else if (ip == ComboAddress("192.0.2.1:53") || ip == ComboAddress("[2001:DB8::1]:53")) { + if (domain.isPartOf(target1)) { + setLWResult(res, 0, false, false, true); + addRecordToLW(res, "powerdns.com.", QType::NS, "ns1.powerdns.org.", DNSResourceRecord::AUTHORITY, 172800); + addRecordToLW(res, "powerdns.com.", QType::NS, "ns2.powerdns.org.", DNSResourceRecord::AUTHORITY, 172800); + return LWResult::Result::Success; + } + else if (domain.isPartOf(target2)) { + setLWResult(res, 0, false, false, true); + addRecordToLW(res, "powerdns.org.", QType::NS, "ns1.powerdns.com.", DNSResourceRecord::AUTHORITY, 172800); + addRecordToLW(res, "powerdns.org.", QType::NS, "ns2.powerdns.com.", DNSResourceRecord::AUTHORITY, 172800); + return LWResult::Result::Success; + } + setLWResult(res, RCode::NXDomain, false, false, true); + return LWResult::Result::Success; + } + else { + return LWResult::Result::Timeout; + } + }); + + SyncRes::s_nonresolvingnsmaxfails = 1; + SyncRes::s_nonresolvingnsthrottletime = 60; + + vector ret; + int res = sr->beginResolve(target1, QType(QType::A), QClass::IN, ret); + BOOST_CHECK_EQUAL(res, RCode::ServFail); + BOOST_REQUIRE_EQUAL(ret.size(), 0U); + // queriesToNS count varies due to shuffling + // But all NS from above should be recorded as failing + BOOST_CHECK_EQUAL(SyncRes::getNonResolvingNSSize(), 4U); +} + BOOST_AUTO_TEST_CASE(test_cname_qperq) { std::unique_ptr sr; diff --git a/pdns/syncres.cc b/pdns/syncres.cc index e1b1319596..d0d72c6e0b 100644 --- a/pdns/syncres.cc +++ b/pdns/syncres.cc @@ -2288,11 +2288,14 @@ vector SyncRes::retrieveAddressesForNS(const std::string& prefix, { vector result; - + size_t nonresolvingfails = 0; if (!tns->first.empty()) { - if (s_nonresolvingnsmaxfails > 0 && t_sstorage.nonresolving.value(tns->first) >= s_nonresolvingnsmaxfails) { - LOG(prefix<first<< " in non-resolving map, skipping"< 0) { + nonresolvingfails = t_sstorage.nonresolving.value(tns->first); + if (nonresolvingfails >= s_nonresolvingnsmaxfails) { + LOG(prefix<first<< " in non-resolving map, skipping"<first<< "' ("<<1+tns-rnameservers.begin()<<"/"<<(unsigned int)rnameservers.size()<<")"< SyncRes::retrieveAddressesForNS(const std::string& prefix, } throw ex; } - if (s_nonresolvingnsmaxfails > 0 && result.empty() && nretrieveAddressesForNS > oldnretrieveAddressesForNS) { - auto dontThrottleNames = g_dontThrottleNames.getLocal(); - if (!dontThrottleNames->check(tns->first)) { - t_sstorage.nonresolving.incr(tns->first, d_now); + if (s_nonresolvingnsmaxfails > 0 && nretrieveAddressesForNS > oldnretrieveAddressesForNS) { + if (result.empty()) { + auto dontThrottleNames = g_dontThrottleNames.getLocal(); + if (!dontThrottleNames->check(tns->first)) { + t_sstorage.nonresolving.incr(tns->first, d_now); + } + } + else if (nonresolvingfails > 0) { + // Succeeding resolve, clear memory of recent failures + t_sstorage.nonresolving.clear(tns->first); } } pierceDontQuery=false; diff --git a/pdns/syncres.hh b/pdns/syncres.hh index 4e867e7570..f537ee4b0d 100644 --- a/pdns/syncres.hh +++ b/pdns/syncres.hh @@ -548,10 +548,18 @@ public: { return t_sstorage.fails.size(); } + static uint64_t getNonResolvingNSSize() + { + return t_sstorage.nonresolving.size(); + } static void clearFailedServers() { t_sstorage.fails.clear(); } + static void clearNonResolvingNS() + { + t_sstorage.nonresolving.clear(); + } static void pruneFailedServers(time_t cutoff) { t_sstorage.fails.prune(cutoff);