From: Otto Moerbeek Date: Wed, 1 Mar 2023 08:23:37 +0000 (+0100) Subject: Simplify serve-stale logic X-Git-Tag: rec-4.8.3~1^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7fa6100a579ef352a58cc1e9e2335d1833a386c8;p=thirdparty%2Fpdns.git Simplify serve-stale logic - No more special handling of ImmediateServFailException, they remain fatal (individual failure to contact an NS returns and does not throw) - Explicitly only look in cache on serve-stale iteration of loop --- diff --git a/pdns/recursordist/test-syncres_cc10.cc b/pdns/recursordist/test-syncres_cc10.cc index e416956328..dce931b9da 100644 --- a/pdns/recursordist/test-syncres_cc10.cc +++ b/pdns/recursordist/test-syncres_cc10.cc @@ -1565,7 +1565,7 @@ BOOST_AUTO_TEST_CASE(test_servestale_cname_to_nxdomain) BOOST_CHECK(ret[1].d_type == QType::A); BOOST_CHECK_EQUAL(ret[0].d_name, target); BOOST_CHECK_EQUAL(ret[1].d_name, DNSName("cname.powerdns.com")); - BOOST_CHECK_EQUAL(downCount, 4U); + BOOST_CHECK_EQUAL(downCount, 8U); BOOST_CHECK_EQUAL(lookupCount, 2U); // Again, no lookup as the record is marked stale @@ -1577,7 +1577,7 @@ BOOST_AUTO_TEST_CASE(test_servestale_cname_to_nxdomain) BOOST_CHECK(ret[1].d_type == QType::A); BOOST_CHECK_EQUAL(ret[0].d_name, target); BOOST_CHECK_EQUAL(ret[1].d_name, DNSName("cname.powerdns.com")); - BOOST_CHECK_EQUAL(downCount, 4U); + BOOST_CHECK_EQUAL(downCount, 8U); BOOST_CHECK_EQUAL(lookupCount, 2U); // Again, no lookup as the record is marked stale but as the TTL has passed a task should have been pushed @@ -1590,7 +1590,7 @@ BOOST_AUTO_TEST_CASE(test_servestale_cname_to_nxdomain) BOOST_CHECK(ret[1].d_type == QType::A); BOOST_CHECK_EQUAL(ret[0].d_name, target); BOOST_CHECK_EQUAL(ret[1].d_name, DNSName("cname.powerdns.com")); - BOOST_CHECK_EQUAL(downCount, 4U); + BOOST_CHECK_EQUAL(downCount, 8U); BOOST_CHECK_EQUAL(lookupCount, 2U); BOOST_REQUIRE_EQUAL(getTaskSize(), 2U); @@ -1613,7 +1613,7 @@ BOOST_AUTO_TEST_CASE(test_servestale_cname_to_nxdomain) BOOST_REQUIRE_EQUAL(ret.size(), 1U); BOOST_CHECK(ret[0].d_type == QType::SOA); BOOST_CHECK_EQUAL(ret[0].d_name, auth); - BOOST_CHECK_EQUAL(downCount, 4U); + BOOST_CHECK_EQUAL(downCount, 8U); BOOST_CHECK_EQUAL(lookupCount, 3U); // And again, result should come from cache @@ -1624,7 +1624,7 @@ BOOST_AUTO_TEST_CASE(test_servestale_cname_to_nxdomain) BOOST_REQUIRE_EQUAL(ret.size(), 1U); BOOST_CHECK(ret[0].d_type == QType::SOA); BOOST_CHECK_EQUAL(ret[0].d_name, auth); - BOOST_CHECK_EQUAL(downCount, 4U); + BOOST_CHECK_EQUAL(downCount, 8U); BOOST_CHECK_EQUAL(lookupCount, 3U); } diff --git a/pdns/syncres.cc b/pdns/syncres.cc index ad976bf51b..582e38a634 100644 --- a/pdns/syncres.cc +++ b/pdns/syncres.cc @@ -1807,227 +1807,220 @@ int SyncRes::doResolveNoQNameMinimization(const DNSName &qname, const QType qtyp const int iterations = !d_refresh && MemRecursorCache::s_maxServedStaleExtensions > 0 ? 2 : 1; for (int loop = 0; loop < iterations; loop++) { - if (loop == 1) { - d_serveStale = true; - } + d_serveStale = loop == 1; - // When we're not on the last iteration, a timeout is not fatal - const bool exceptionOnTimeout = loop == iterations - 1; - - try { - // This is a difficult way of expressing "this is a normal query", i.e. not getRootNS. - if(!(d_updatingRootNS && qtype.getCode()==QType::NS && qname.isRoot())) { - if(d_cacheonly) { // very limited OOB support - LWResult lwr; - LOG(prefix<end()) { - if(iter->second.isAuth()) { - ret.clear(); - d_wasOutOfBand = doOOBResolve(qname, qtype, ret, depth, res); - if (fromCache) - *fromCache = d_wasOutOfBand; - return res; - } - else if (considerforwards) { - const vector& servers = iter->second.d_servers; - const ComboAddress remoteIP = servers.front(); - LOG(prefix< nm; - bool chained = false; - // forwardes are "anonymous", so plug in an empty name; some day we might have a fancier config language... - auto resolveRet = asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, authname, qtype.getCode(), false, false, &d_now, nm, &lwr, &chained, DNSName()); - - d_totUsec += lwr.d_usec; - accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family); - ++g_stats.authRCode.at(lwr.d_rcode); - if (fromCache) - *fromCache = true; - - // filter out the good stuff from lwr.result() - if (resolveRet == LWResult::Result::Success) { - for(const auto& rec : lwr.d_records) { - if(rec.d_place == DNSResourceRecord::ANSWER) - ret.push_back(rec); - } - return 0; - } - else { - return RCode::ServFail; + // This is a difficult way of expressing "this is a normal query", i.e. not getRootNS. + if(!(d_updatingRootNS && qtype.getCode()==QType::NS && qname.isRoot())) { + if(d_cacheonly) { // very limited OOB support + LWResult lwr; + LOG(prefix<end()) { + if(iter->second.isAuth()) { + ret.clear(); + d_wasOutOfBand = doOOBResolve(qname, qtype, ret, depth, res); + if (fromCache) + *fromCache = d_wasOutOfBand; + return res; + } + else if (considerforwards) { + const vector& servers = iter->second.d_servers; + const ComboAddress remoteIP = servers.front(); + LOG(prefix< nm; + bool chained = false; + // forwardes are "anonymous", so plug in an empty name; some day we might have a fancier config language... + auto resolveRet = asyncresolveWrapper(remoteIP, d_doDNSSEC, qname, authname, qtype.getCode(), false, false, &d_now, nm, &lwr, &chained, DNSName()); + + d_totUsec += lwr.d_usec; + accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family); + ++g_stats.authRCode.at(lwr.d_rcode); + if (fromCache) + *fromCache = true; + + // filter out the good stuff from lwr.result() + if (resolveRet == LWResult::Result::Success) { + for(const auto& rec : lwr.d_records) { + if(rec.d_place == DNSResourceRecord::ANSWER) + ret.push_back(rec); } + return 0; + } + else { + return RCode::ServFail; } } } + } - DNSName authname(qname); - bool wasForwardedOrAuthZone = false; - bool wasAuthZone = false; - bool wasForwardRecurse = false; - domainmap_t::const_iterator iter = getBestAuthZone(&authname); - if(iter != t_sstorage.domainmap->end()) { - const auto& domain = iter->second; - wasForwardedOrAuthZone = true; + DNSName authname(qname); + bool wasForwardedOrAuthZone = false; + bool wasAuthZone = false; + bool wasForwardRecurse = false; + domainmap_t::const_iterator iter = getBestAuthZone(&authname); + if(iter != t_sstorage.domainmap->end()) { + const auto& domain = iter->second; + wasForwardedOrAuthZone = true; - if (domain.isAuth()) { - wasAuthZone = true; - } else if (domain.shouldRecurse()) { - wasForwardRecurse = true; - } + if (domain.isAuth()) { + wasAuthZone = true; + } else if (domain.shouldRecurse()) { + wasForwardRecurse = true; } + } - /* When we are looking for a DS, we want to the non-CNAME cache check first - because we can actually have a DS (from the parent zone) AND a CNAME (from - the child zone), and what we really want is the DS */ - if (qtype != QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, res, state, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed - d_wasOutOfBand = wasAuthZone; - // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we - // are in QM Step0) we might have a CNAME but not the corresponding target. - // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since - // we will get the records from the cache, resulting in a small overhead. - // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since - // RPZ rules will not be evaluated anymore (we already matched). - const bool stoppedByPolicyHit = d_appliedPolicy.wasHit(); - - if (fromCache && (!d_cacheonly || stoppedByPolicyHit)) { - *fromCache = true; - } - /* Apply Post filtering policies */ - - if (d_wantsRPZ && !stoppedByPolicyHit) { - auto luaLocal = g_luaconfs.getLocal(); - if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) { - mergePolicyTags(d_policyTags, d_appliedPolicy.getTags()); - bool done = false; - handlePolicyHit(prefix, qname, qtype, ret, done, res, depth); - if (done && fromCache) { - *fromCache = true; - } + /* When we are looking for a DS, we want to the non-CNAME cache check first + because we can actually have a DS (from the parent zone) AND a CNAME (from + the child zone), and what we really want is the DS */ + if (qtype != QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, res, state, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed + d_wasOutOfBand = wasAuthZone; + // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we + // are in QM Step0) we might have a CNAME but not the corresponding target. + // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since + // we will get the records from the cache, resulting in a small overhead. + // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since + // RPZ rules will not be evaluated anymore (we already matched). + const bool stoppedByPolicyHit = d_appliedPolicy.wasHit(); + + if (fromCache && (!d_cacheonly || stoppedByPolicyHit)) { + *fromCache = true; + } + /* Apply Post filtering policies */ + + if (d_wantsRPZ && !stoppedByPolicyHit) { + auto luaLocal = g_luaconfs.getLocal(); + if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) { + mergePolicyTags(d_policyTags, d_appliedPolicy.getTags()); + bool done = false; + handlePolicyHit(prefix, qname, qtype, ret, done, res, depth); + if (done && fromCache) { + *fromCache = true; } } - return res; } + return res; + } - if (doCacheCheck(qname, authname, wasForwardedOrAuthZone, wasAuthZone, wasForwardRecurse, qtype, ret, depth, res, state)) { - // we done - d_wasOutOfBand = wasAuthZone; - if (fromCache) { - *fromCache = true; - } + if (doCacheCheck(qname, authname, wasForwardedOrAuthZone, wasAuthZone, wasForwardRecurse, qtype, ret, depth, res, state)) { + // we done + d_wasOutOfBand = wasAuthZone; + if (fromCache) { + *fromCache = true; + } - if (d_wantsRPZ && !d_appliedPolicy.wasHit()) { - auto luaLocal = g_luaconfs.getLocal(); - if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) { - mergePolicyTags(d_policyTags, d_appliedPolicy.getTags()); - bool done = false; - handlePolicyHit(prefix, qname, qtype, ret, done, res, depth); - } + if (d_wantsRPZ && !d_appliedPolicy.wasHit()) { + auto luaLocal = g_luaconfs.getLocal(); + if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) { + mergePolicyTags(d_policyTags, d_appliedPolicy.getTags()); + bool done = false; + handlePolicyHit(prefix, qname, qtype, ret, done, res, depth); } - - return res; } - /* if we have not found a cached DS (or denial of), now is the time to look for a CNAME */ - if (qtype == QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, res, state, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed - d_wasOutOfBand = wasAuthZone; - // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we - // are in QM Step0) we might have a CNAME but not the corresponding target. - // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since - // we will get the records from the cache, resulting in a small overhead. - // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since - // RPZ rules will not be evaluated anymore (we already matched). - const bool stoppedByPolicyHit = d_appliedPolicy.wasHit(); - - if (fromCache && (!d_cacheonly || stoppedByPolicyHit)) { - *fromCache = true; - } - /* Apply Post filtering policies */ - - if (d_wantsRPZ && !stoppedByPolicyHit) { - auto luaLocal = g_luaconfs.getLocal(); - if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) { - mergePolicyTags(d_policyTags, d_appliedPolicy.getTags()); - bool done = false; - handlePolicyHit(prefix, qname, qtype, ret, done, res, depth); - if (done && fromCache) { - *fromCache = true; - } + return res; + } + + /* if we have not found a cached DS (or denial of), now is the time to look for a CNAME */ + if (qtype == QType::DS && doCNAMECacheCheck(qname, qtype, ret, depth, res, state, wasAuthZone, wasForwardRecurse)) { // will reroute us if needed + d_wasOutOfBand = wasAuthZone; + // Here we have an issue. If we were prevented from going out to the network (cache-only was set, possibly because we + // are in QM Step0) we might have a CNAME but not the corresponding target. + // It means that we will sometimes go to the next steps when we are in fact done, but that's fine since + // we will get the records from the cache, resulting in a small overhead. + // This might be a real problem if we had a RPZ hit, though, because we do not want the processing to continue, since + // RPZ rules will not be evaluated anymore (we already matched). + const bool stoppedByPolicyHit = d_appliedPolicy.wasHit(); + + if (fromCache && (!d_cacheonly || stoppedByPolicyHit)) { + *fromCache = true; + } + /* Apply Post filtering policies */ + + if (d_wantsRPZ && !stoppedByPolicyHit) { + auto luaLocal = g_luaconfs.getLocal(); + if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) { + mergePolicyTags(d_policyTags, d_appliedPolicy.getTags()); + bool done = false; + handlePolicyHit(prefix, qname, qtype, ret, done, res, depth); + if (done && fromCache) { + *fromCache = true; } } - - return res; } - } - if (d_cacheonly) { - return 0; + return res; } + } - LOG(prefix<> fallBack; - { - auto lock = s_savedParentNSSet.lock(); - auto domainData= lock->find(subdomain); - if (domainData != lock->end() && domainData->d_nsAddresses.size() > 0) { - nsset.clear(); - // Build the nsset arg and fallBack data for the fallback doResolveAt() attempt - // Take a copy to be able to release the lock, NsSet is actually a map, go figure - for (const auto& ns : domainData->d_nsAddresses) { - nsset.emplace(ns.first, pair(std::vector(), false)); - fallBack.emplace(ns.first, ns.second); - } - } - } - if (fallBack.size() > 0) { - LOG(prefix<inc(subdomain); + // the two retries allow getBestNSNamesFromCache&co to reprime the root + // hints, in case they ever go missing + for(int tries=0;tries<2 && nsset.empty();++tries) { + subdomain=getBestNSNamesFromCache(subdomain, qtype, nsset, &flawedNSSet, depth, beenthere); // pass beenthere to both occasions + } + + res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere, state, stopAtDelegation, nullptr); + + if (res == -1 && s_save_parent_ns_set) { + // It did not work out, lets check if we have a saved parent NS set + map> fallBack; + { + auto lock = s_savedParentNSSet.lock(); + auto domainData= lock->find(subdomain); + if (domainData != lock->end() && domainData->d_nsAddresses.size() > 0) { + nsset.clear(); + // Build the nsset arg and fallBack data for the fallback doResolveAt() attempt + // Take a copy to be able to release the lock, NsSet is actually a map, go figure + for (const auto& ns : domainData->d_nsAddresses) { + nsset.emplace(ns.first, pair(std::vector(), false)); + fallBack.emplace(ns.first, ns.second); } } } - /* Apply Post filtering policies */ - if (d_wantsRPZ && !d_appliedPolicy.wasHit()) { - auto luaLocal = g_luaconfs.getLocal(); - if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) { - mergePolicyTags(d_policyTags, d_appliedPolicy.getTags()); - bool done = false; - handlePolicyHit(prefix, qname, qtype, ret, done, res, depth); + if (fallBack.size() > 0) { + LOG(prefix<inc(subdomain); } } - - if (!res) { - return 0; + } + /* Apply Post filtering policies */ + if (d_wantsRPZ && !d_appliedPolicy.wasHit()) { + auto luaLocal = g_luaconfs.getLocal(); + if (luaLocal->dfe.getPostPolicy(ret, d_discardedPolicies, d_appliedPolicy)) { + mergePolicyTags(d_policyTags, d_appliedPolicy.getTags()); + bool done = false; + handlePolicyHit(prefix, qname, qtype, ret, done, res, depth); } + } - LOG(prefix<= 0) { - break; - } + if (!res) { + return 0; } - catch (const ImmediateServFailException&) { - if (exceptionOnTimeout) { - throw; - } + + LOG(prefix<= 0) { + break; } } return res<0 ? RCode::ServFail : res;