From: Otto Date: Tue, 19 Oct 2021 08:48:28 +0000 (+0200) Subject: Remember parent NS set, to be able to fallback to it if needed X-Git-Tag: rec-4.7.0-beta1~44^2~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4e45e5bc3d88a1f65dafd8700ce67d5fa3f8b33d;p=thirdparty%2Fpdns.git Remember parent NS set, to be able to fallback to it if needed When a non-auth NS set is replaced by an auth one, remember the non-auth one if it contains NS not in the auth set. Use that NS set later as a fallback if resolving fails. TODO: general validation of method, pruning the new table, tests, config switch? Should fix #10594 --- diff --git a/pdns/rec_channel_rec.cc b/pdns/rec_channel_rec.cc index 8ed79f3b10..4c124160e7 100644 --- a/pdns/rec_channel_rec.cc +++ b/pdns/rec_channel_rec.cc @@ -377,6 +377,11 @@ static uint64_t* pleaseDumpFailedServers(int fd) return new uint64_t(SyncRes::doDumpFailedServers(fd)); } +static uint64_t* pleaseDumpSavedParentNSSets(int fd) +{ + return new uint64_t(SyncRes::doDumpSavedParentNSSets(fd)); +} + static uint64_t* pleaseDumpNonResolvingNS(int fd) { return new uint64_t(SyncRes::doDumpNonResolvingNS(fd)); @@ -1904,6 +1909,8 @@ RecursorControlChannel::Answer RecursorControlParser::getAnswer(int s, const str "dump-failedservers dump the failed servers to the named file\n" "dump-non-resolving dump non-resolving nameservers addresses to the named file\n" "dump-nsspeeds dump nsspeeds statistics to the named file\n" + "dump-saved-parent-ns-sets \n" + " dump saved parent ns sets that were used successfully as fallback\n" "dump-rpz dump the content of a RPZ zone to the named file\n" "dump-throttlemap dump the contents of the throttle map to the named file\n" "get [key1] [key2] .. get specific statistics\n" @@ -1980,6 +1987,9 @@ RecursorControlChannel::Answer RecursorControlParser::getAnswer(int s, const str if (cmd == "dump-failedservers") { return doDumpToFile(s, pleaseDumpFailedServers, cmd, false); } + if (cmd == "dump-saved-parent-ns-sets") { + return doDumpToFile(s, pleaseDumpSavedParentNSSets, cmd, false); + } if (cmd == "dump-rpz") { return doDumpRPZ(s, begin, end); } diff --git a/pdns/rec_control.cc b/pdns/rec_control.cc index fdb6817902..4b8d32b74e 100644 --- a/pdns/rec_control.cc +++ b/pdns/rec_control.cc @@ -95,7 +95,8 @@ int main(int argc, char** argv) "dump-failedservers", "dump-rpz", "dump-throttlemap", - "dump-non-resolving"}; + "dump-non-resolving", + "dump-saved-parent-ns-sets"}; try { initArguments(argc, argv); string sockname = "pdns_recursor"; diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc index 3ade92299f..0b1aa95889 100644 --- a/pdns/recursordist/rec-main.cc +++ b/pdns/recursordist/rec-main.cc @@ -881,7 +881,10 @@ static void doStats(void) << broadcastAccFunction(pleaseGetThrottleSize) << ", ns speeds: " << broadcastAccFunction(pleaseGetNsSpeedsSize) << ", failed ns: " << SyncRes::getFailedServersSize() << ", ednsmap: " - << broadcastAccFunction(pleaseGetEDNSStatusesSize) << endl; + << broadcastAccFunction(pleaseGetEDNSStatusesSize) << ", non-resolving: " + << SyncRes::getNonResolvingNSSize() << ", saved-parentsets: " + << SyncRes::getSaveParentsNSSetsSize() + << endl; g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries) << "%"; g_log << Logger::Notice << ", " << ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries + SyncRes::s_throttledqueries) << "% throttled" << endl; g_log << Logger::Notice << "stats: " << SyncRes::s_tcpoutqueries << "/" << SyncRes::s_dotoutqueries << "/" << getCurrentIdleTCPConnections() << " outgoing tcp/dot/idle connections, " << broadcastAccFunction(pleaseGetConcurrentQueries) << " queries running, " << SyncRes::s_outgoingtimeouts << " outgoing timeouts " << endl; @@ -1924,6 +1927,11 @@ static void houseKeeping(void*) SyncRes::pruneNonResolving(now.tv_sec - SyncRes::s_nonresolvingnsthrottletime); }); + static PeriodicTask pruneSaveParentSetTask{"pruneSaveParentSetTask", 60}; + pruneSaveParentSetTask.runIfDue(now, [now]() { + SyncRes::pruneSaveParentsNSSets(now.tv_sec); + }); + // Divide by 12 to get the original 2 hour cycle if s_maxcachettl is default (1 day) static PeriodicTask rootUpdateTask{"rootUpdateTask", std::max(SyncRes::s_maxcachettl / 12, 10U)}; rootUpdateTask.runIfDue(now, [now]() { diff --git a/pdns/recursordist/test-syncres_cc.cc b/pdns/recursordist/test-syncres_cc.cc index 8f05f922e2..5eeb125c96 100644 --- a/pdns/recursordist/test-syncres_cc.cc +++ b/pdns/recursordist/test-syncres_cc.cc @@ -192,6 +192,8 @@ void initSR(bool debug) BOOST_CHECK_EQUAL(SyncRes::getFailedServersSize(), 0U); SyncRes::clearNonResolvingNS(); BOOST_CHECK_EQUAL(SyncRes::getNonResolvingNSSize(), 0U); + SyncRes::clearSaveParentsNSSets(); + BOOST_CHECK_EQUAL(SyncRes::getSaveParentsNSSetsSize(), 0U); SyncRes::clearECSStats(); diff --git a/pdns/syncres.cc b/pdns/syncres.cc index 9db36d3c45..a5bf23b1ce 100644 --- a/pdns/syncres.cc +++ b/pdns/syncres.cc @@ -51,6 +51,7 @@ SyncRes::LogMode SyncRes::s_lm; const std::unordered_set SyncRes::s_redirectionQTypes = {QType::CNAME, QType::DNAME}; LockGuarded> SyncRes::s_fails; LockGuarded> SyncRes::s_nonresolving; +LockGuarded SyncRes::s_savedParentNSSet; unsigned int SyncRes::s_maxnegttl; unsigned int SyncRes::s_maxbogusttl; @@ -735,6 +736,37 @@ uint64_t SyncRes::doDumpNonResolvingNS(int fd) return count; } +uint64_t SyncRes::doDumpSavedParentNSSets(int fd) +{ + int newfd = dup(fd); + if (newfd == -1) { + return 0; + } + auto fp = std::unique_ptr(fdopen(newfd, "w"), fclose); + if (!fp) { + close(newfd); + return 0; + } + fprintf(fp.get(), "; dump of saved parent nameserver sets succesfully used follows\n"); + fprintf(fp.get(), "; total entries: %zu\n", s_savedParentNSSet.lock()->size()); + fprintf(fp.get(), "; domain\tsuccess\tttd\n"); + uint64_t count=0; + + // We get a copy, so the I/O does not need to happen while holding the lock + for (const auto& i : s_savedParentNSSet.lock()->getMapCopy()) + { + if (i.d_count == 0) { + continue; + } + count++; + char tmp[26]; + ctime_r(&i.d_ttd, tmp); + fprintf(fp.get(), "%s\t%llu\t%s", i.d_domain.toString().c_str(), static_cast(i.d_count), tmp); + } + + return count; +} + /* so here is the story. First we complete the full resolution process for a domain name. And only THEN do we decide to also do DNSSEC validation, which leads to new queries. To make this simple, we *always* ask for DNSSEC records so that if there are RRSIGs for a name, we'll have them. @@ -1244,8 +1276,33 @@ int SyncRes::doResolveNoQNameMinimization(const DNSName &qname, const QType qtyp subdomain=getBestNSNamesFromCache(subdomain, qtype, nsset, &flawedNSSet, depth, beenthere); // pass beenthere to both occasions } - res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere, state, stopAtDelegation); + res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere, state, stopAtDelegation, nullptr); + if (res == -1) { + // It did not work out, lets check if we have a saved parent NS set + map> fallBack; + { + auto lock = s_savedParentNSSet.lock(); + auto domainData= lock->find(subdomain); + if (domainData != lock->end() && domainData->d_nsAddresses.size() > 0) { + nsset.clear(); + // Build the nsset arg and fallBack data for the fallback doResolveAt() attempt + // Take a copy to be able to release the lock, NsSet is actually a map, go figure + for (const auto& ns : domainData->d_nsAddresses) { + nsset.emplace(ns.first, pair(std::vector(), false)); + fallBack.emplace(ns.first, ns.second); + } + } + } + if (fallBack.size() > 0) { + LOG(prefix<inc(subdomain); + } + } + } /* Apply Post filtering policies */ if (d_wantsRPZ && !d_appliedPolicy.wasHit()) { auto luaLocal = g_luaconfs.getLocal(); @@ -3401,6 +3458,55 @@ void SyncRes::sanitizeRecords(const std::string& prefix, LWResult& lwr, const DN } } + +void SyncRes::rememberParentSetIfNeeded(const DNSName& domain, const vector& newRecords, unsigned int depth) +{ + vector existing; + bool wasAuth = false; + auto ttl = g_recCache->get(d_now.tv_sec, domain, QType::NS, false, &existing, d_cacheRemote, false, d_routingTag, nullptr, nullptr, nullptr, nullptr, &wasAuth); + + if (ttl <= 0 || wasAuth) { + return; + } + { + auto lock = s_savedParentNSSet.lock(); + if (lock->find(domain) != lock->end()) { + // no relevant data, or we already stored the parent data + return; + } + } + + set authSet; + for (const auto& ns : newRecords) { + auto content = getRR(ns); + authSet.insert(content->getNS()); + } + // The glue IPs could also differ, but we're not checking that yet, we're only looking for child NS records not + // in the parent set + bool shouldSave = false; + for (const auto& ns : existing) { + auto content = getRR(ns); + if (authSet.count(content->getNS()) == 0) { + LOG(d_prefix << domain << ": at least one child NS was not in the parent NS set, remembering parent NS set and cached IPs" << endl); + shouldSave = true; + break; + } + } + + if (shouldSave) { + map> entries; + for (const auto& ns : existing) { + auto content = getRR(ns); + const DNSName& name = content->getNS(); + set beenthereIgnored; + unsigned int nretrieveAddressesForNSIgnored; + auto addresses = getAddrs(name, depth, beenthereIgnored, true, nretrieveAddressesForNSIgnored); + entries.emplace(name, addresses); + } + s_savedParentNSSet.lock()->emplace(domain, std::move(entries), d_now.tv_sec + ttl); + } +} + RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, const boost::optional ednsmask, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool rdQuery, const ComboAddress& remoteIP) { bool wasForwardRecurse = wasForwarded && rdQuery; @@ -3720,6 +3826,10 @@ RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, LWResult& lwr d_fromAuthIP = remoteIP; if (doCache) { + // Check if we are going to replace a non-auth (parent) NS recordset + if (isAA && i->first.type == QType::NS) { + rememberParentSetIfNeeded(i->first.name, i->second.records, depth); + } g_recCache->replace(d_now.tv_sec, i->first.name, i->first.type, i->second.records, i->second.signatures, authorityRecs, i->first.type == QType::DS ? true : isAA, auth, i->first.place == DNSResourceRecord::ANSWER ? ednsmask : boost::none, d_routingTag, recordState, remoteIP); if (g_aggressiveNSECCache && needWildcardProof && recordState == vState::Secure && i->first.place == DNSResourceRecord::ANSWER && i->first.name == qname && !i->second.signatures.empty() && !d_routingTag && !ednsmask) { @@ -4531,7 +4641,8 @@ bool SyncRes::doDoTtoAuth(const DNSName& ns) const */ int SyncRes::doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, const DNSName &qname, const QType qtype, vector&ret, - unsigned int depth, set&beenthere, vState& state, StopAtDelegation* stopAtDelegation) + unsigned int depth, set&beenthere, vState& state, StopAtDelegation* stopAtDelegation, + map>* fallBack) { auto luaconfsLocal = g_luaconfs.getLocal(); string prefix; @@ -4628,8 +4739,13 @@ int SyncRes::doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, con } } else { - /* if tns is empty, retrieveAddressesForNS() knows we have hardcoded servers (i.e. "forwards") */ - remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly, addressQueriesForNS); + if (fallBack == nullptr) { + /* if tns is empty, retrieveAddressesForNS() knows we have hardcoded servers (i.e. "forwards") */ + remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly, addressQueriesForNS); + } else { + // should be save, caller makes sure nameservers and fallback contain the same names + remoteIPs = fallBack->at(tns->first); + } if(remoteIPs.empty()) { LOG(prefix<first<<", trying next if available"<>&& nsAddresses, time_t ttd) + : d_domain(name), d_nsAddresses(nsAddresses), d_ttd(ttd) + { + } + DNSName d_domain; + map> d_nsAddresses; + time_t d_ttd; + mutable uint64_t d_count{0}; + }; + + typedef multi_index_container< + SavedParentEntry, + indexed_by, member>, + ordered_non_unique, member> + >> SavedParentNSSetBase; + + class SavedParentNSSet : public SavedParentNSSetBase + { + public: + void prune(time_t now) + { + auto &ind = get(); + ind.erase(ind.begin(), ind.upper_bound(now)); + } + void inc(const DNSName& name) + { + auto it = find(name); + if (it != end()) { + ++(*it).d_count; + } + } + SavedParentNSSet getMapCopy() const + { + return *this; + } + }; + static LockGuarded> s_fails; static LockGuarded> s_nonresolving; + static LockGuarded s_savedParentNSSet; struct ThreadLocalStorage { nsspeeds_t nsSpeeds; @@ -426,6 +466,8 @@ public: static uint64_t doDumpThrottleMap(int fd); static uint64_t doDumpFailedServers(int fd); static uint64_t doDumpNonResolvingNS(int fd); + static uint64_t doDumpSavedParentNSSets(int fd); + static int getRootNS(struct timeval now, asyncresolve_t asyncCallback, unsigned int depth); static void addDontQuery(const std::string& mask) { @@ -570,6 +612,18 @@ public: { s_nonresolving.lock()->prune(cutoff); } + static void clearSaveParentsNSSets() + { + s_savedParentNSSet.lock()->clear(); + } + static size_t getSaveParentsNSSetsSize() + { + return s_savedParentNSSet.lock()->size(); + } + static void pruneSaveParentsNSSets(time_t now) + { + s_savedParentNSSet.lock()->prune(now); + } static void setDomainMap(std::shared_ptr newMap) { t_sstorage.domainmap = newMap; @@ -857,7 +911,8 @@ private: bool doDoTtoAuth(const DNSName& ns) const; int doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, const DNSName &qname, QType qtype, vector&ret, - unsigned int depth, set&beenthere, vState& state, StopAtDelegation* stopAtDelegation); + unsigned int depth, set&beenthere, vState& state, StopAtDelegation* stopAtDelegation, + std::map>* fallback); bool doResolveAtThisIP(const std::string& prefix, const DNSName& qname, const QType qtype, LWResult& lwr, boost::optional& ednsmask, const DNSName& auth, bool const sendRDQuery, const bool wasForwarded, const DNSName& nsName, const ComboAddress& remoteIP, bool doTCP, bool doDoT, bool& truncated, bool& spoofed); bool processAnswer(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType qtype, DNSName& auth, bool wasForwarded, const boost::optional ednsmask, bool sendRDQuery, NsSet &nameservers, std::vector& ret, const DNSFilterEngine& dfe, bool* gotNewServers, int* rcode, vState& state, const ComboAddress& remoteIP); @@ -888,6 +943,7 @@ private: /* This function will check whether the answer should have the AA bit set, and will set if it should be set and isn't. This is unfortunately needed to deal with very crappy so-called DNS servers */ void fixupAnswer(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, bool rdQuery); + void rememberParentSetIfNeeded(const DNSName& domain, const vector& newRecords, unsigned int depth); RCode::rcodes_ updateCacheFromRecords(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, const boost::optional, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool sendRDQuery, const ComboAddress& remoteIP); bool processRecords(const std::string& prefix, const DNSName& qname, const QType qtype, const DNSName& auth, LWResult& lwr, const bool sendRDQuery, vector& ret, set& nsset, DNSName& newtarget, DNSName& newauth, bool& realreferral, bool& negindic, vState& state, const bool needWildcardProof, const bool gatherwildcardProof, const unsigned int wildcardLabelsCount, int& rcode, bool& negIndicHasSignatures, unsigned int depth);