]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
Remember parent NS set, to be able to fallback to it if needed
authorOtto <otto.moerbeek@open-xchange.com>
Tue, 19 Oct 2021 08:48:28 +0000 (10:48 +0200)
committerOtto Moerbeek <otto.moerbeek@open-xchange.com>
Mon, 21 Mar 2022 09:16:02 +0000 (10:16 +0100)
When a non-auth NS set is replaced by an auth one, remember the non-auth one if
it contains NS not in the auth set.

Use that NS set later as a fallback if resolving fails.

TODO: general validation of method, pruning the new table, tests, config switch?

Should fix #10594

pdns/rec_channel_rec.cc
pdns/rec_control.cc
pdns/recursordist/rec-main.cc
pdns/recursordist/test-syncres_cc.cc
pdns/syncres.cc
pdns/syncres.hh

index 8ed79f3b10a820ed4145d7e2e1e34e14c414f28f..4c124160e7b566f2b0f91bc8fb9c3fb828e46133 100644 (file)
@@ -377,6 +377,11 @@ static uint64_t* pleaseDumpFailedServers(int fd)
   return new uint64_t(SyncRes::doDumpFailedServers(fd));
 }
 
+static uint64_t* pleaseDumpSavedParentNSSets(int fd)
+{
+  return new uint64_t(SyncRes::doDumpSavedParentNSSets(fd));
+}
+
 static uint64_t* pleaseDumpNonResolvingNS(int fd)
 {
   return new uint64_t(SyncRes::doDumpNonResolvingNS(fd));
@@ -1904,6 +1909,8 @@ RecursorControlChannel::Answer RecursorControlParser::getAnswer(int s, const str
             "dump-failedservers <filename>    dump the failed servers to the named file\n"
             "dump-non-resolving <filename>    dump non-resolving nameservers addresses to the named file\n"
             "dump-nsspeeds <filename>         dump nsspeeds statistics to the named file\n"
+            "dump-saved-parent-ns-sets <filename>\n"
+            "                                 dump saved parent ns sets that were used successfully as fallback\n"
             "dump-rpz <zone name> <filename>  dump the content of a RPZ zone to the named file\n"
             "dump-throttlemap <filename>      dump the contents of the throttle map to the named file\n"
             "get [key1] [key2] ..             get specific statistics\n"
@@ -1980,6 +1987,9 @@ RecursorControlChannel::Answer RecursorControlParser::getAnswer(int s, const str
   if (cmd == "dump-failedservers") {
     return doDumpToFile(s, pleaseDumpFailedServers, cmd, false);
   }
+  if (cmd == "dump-saved-parent-ns-sets") {
+    return doDumpToFile(s, pleaseDumpSavedParentNSSets, cmd, false);
+  }
   if (cmd == "dump-rpz") {
     return doDumpRPZ(s, begin, end);
   }
index fdb68179026ebe89b6aecc10eba8893cfdc8eb84..4b8d32b74e5965b504919c5311e94514b974bfac 100644 (file)
@@ -95,7 +95,8 @@ int main(int argc, char** argv)
     "dump-failedservers",
     "dump-rpz",
     "dump-throttlemap",
-    "dump-non-resolving"};
+    "dump-non-resolving",
+    "dump-saved-parent-ns-sets"};
   try {
     initArguments(argc, argv);
     string sockname = "pdns_recursor";
index 3ade92299f868e62c92e580802567442ea44a267..0b1aa958896f429b32cfb85937aa71ab9866b965 100644 (file)
@@ -881,7 +881,10 @@ static void doStats(void)
           << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) << ", ns speeds: "
           << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize) << ", failed ns: "
           << SyncRes::getFailedServersSize() << ", ednsmap: "
-          << broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize) << endl;
+          << broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize) << ", non-resolving: "
+          << SyncRes::getNonResolvingNSSize() << ", saved-parentsets: "
+          << SyncRes::getSaveParentsNSSetsSize()
+          << endl;
     g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries) << "%";
     g_log << Logger::Notice << ", " << ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries + SyncRes::s_throttledqueries) << "% throttled" << endl;
     g_log << Logger::Notice << "stats: " << SyncRes::s_tcpoutqueries << "/" << SyncRes::s_dotoutqueries << "/" << getCurrentIdleTCPConnections() << " outgoing tcp/dot/idle connections, " << broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries) << " queries running, " << SyncRes::s_outgoingtimeouts << " outgoing timeouts " << endl;
@@ -1924,6 +1927,11 @@ static void houseKeeping(void*)
         SyncRes::pruneNonResolving(now.tv_sec - SyncRes::s_nonresolvingnsthrottletime);
       });
 
+      static PeriodicTask pruneSaveParentSetTask{"pruneSaveParentSetTask", 60};
+      pruneSaveParentSetTask.runIfDue(now, [now]() {
+        SyncRes::pruneSaveParentsNSSets(now.tv_sec);
+      });
+
       // Divide by 12 to get the original 2 hour cycle if s_maxcachettl is default (1 day)
       static PeriodicTask rootUpdateTask{"rootUpdateTask", std::max(SyncRes::s_maxcachettl / 12, 10U)};
       rootUpdateTask.runIfDue(now, [now]() {
index 8f05f922e2382143c0bc817d57c916727c3f0cdf..5eeb125c96d2b43976d1791466661e61be837624 100644 (file)
@@ -192,6 +192,8 @@ void initSR(bool debug)
   BOOST_CHECK_EQUAL(SyncRes::getFailedServersSize(), 0U);
   SyncRes::clearNonResolvingNS();
   BOOST_CHECK_EQUAL(SyncRes::getNonResolvingNSSize(), 0U);
+  SyncRes::clearSaveParentsNSSets();
+  BOOST_CHECK_EQUAL(SyncRes::getSaveParentsNSSetsSize(), 0U);
 
   SyncRes::clearECSStats();
 
index 9db36d3c455bd155b4874cfb561073ff5f73fce0..a5bf23b1ce3958f8749b13f24c0a634fc7da8ae2 100644 (file)
@@ -51,6 +51,7 @@ SyncRes::LogMode SyncRes::s_lm;
 const std::unordered_set<QType> SyncRes::s_redirectionQTypes = {QType::CNAME, QType::DNAME};
 LockGuarded<fails_t<ComboAddress>> SyncRes::s_fails;
 LockGuarded<fails_t<DNSName>> SyncRes::s_nonresolving;
+LockGuarded <SyncRes::SavedParentNSSet> SyncRes::s_savedParentNSSet;
 
 unsigned int SyncRes::s_maxnegttl;
 unsigned int SyncRes::s_maxbogusttl;
@@ -735,6 +736,37 @@ uint64_t SyncRes::doDumpNonResolvingNS(int fd)
   return count;
 }
 
+uint64_t SyncRes::doDumpSavedParentNSSets(int fd)
+{
+  int newfd = dup(fd);
+  if (newfd == -1) {
+    return 0;
+  }
+  auto fp = std::unique_ptr<FILE, int(*)(FILE*)>(fdopen(newfd, "w"), fclose);
+  if (!fp) {
+    close(newfd);
+    return 0;
+  }
+  fprintf(fp.get(), "; dump of saved parent nameserver sets succesfully used follows\n");
+  fprintf(fp.get(), "; total entries: %zu\n", s_savedParentNSSet.lock()->size());
+  fprintf(fp.get(), "; domain\tsuccess\tttd\n");
+  uint64_t count=0;
+
+  // We get a copy, so the I/O does not need to happen while holding the lock
+  for (const auto& i : s_savedParentNSSet.lock()->getMapCopy())
+  {
+    if (i.d_count == 0) {
+      continue;
+    }
+    count++;
+    char tmp[26];
+    ctime_r(&i.d_ttd, tmp);
+    fprintf(fp.get(), "%s\t%llu\t%s", i.d_domain.toString().c_str(), static_cast<unsigned long long>(i.d_count), tmp);
+  }
+
+  return count;
+}
+
 /* so here is the story. First we complete the full resolution process for a domain name. And only THEN do we decide
    to also do DNSSEC validation, which leads to new queries. To make this simple, we *always* ask for DNSSEC records
    so that if there are RRSIGs for a name, we'll have them.
@@ -1244,8 +1276,33 @@ int SyncRes::doResolveNoQNameMinimization(const DNSName &qname, const QType qtyp
     subdomain=getBestNSNamesFromCache(subdomain, qtype, nsset, &flawedNSSet, depth, beenthere); //  pass beenthere to both occasions
   }
 
-  res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere, state, stopAtDelegation);
+  res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere, state, stopAtDelegation, nullptr);
 
+  if (res == -1) {
+    // It did not work out, lets check if we have a saved parent NS set
+    map<DNSName, vector<ComboAddress>> fallBack;
+    {
+      auto lock = s_savedParentNSSet.lock();
+      auto domainData= lock->find(subdomain);
+      if (domainData != lock->end() && domainData->d_nsAddresses.size() > 0) {
+        nsset.clear();
+        // Build the nsset arg and fallBack data for the fallback doResolveAt() attempt
+        // Take a copy to be able to release the lock, NsSet is actually a map, go figure
+        for (const auto& ns : domainData->d_nsAddresses) {
+          nsset.emplace(ns.first, pair(std::vector<ComboAddress>(), false));
+          fallBack.emplace(ns.first, ns.second);
+        }
+      }
+    }
+    if (fallBack.size() > 0) {
+      LOG(prefix<<qname<<": Failure, but we have a saved parent NS set, trying that one"<< endl)
+      res = doResolveAt(nsset, subdomain, flawedNSSet, qname, qtype, ret, depth, beenthere, state, stopAtDelegation, &fallBack);
+      if (res == 0) {
+        // It did work out
+        s_savedParentNSSet.lock()->inc(subdomain);
+      }
+    }
+  }
   /* Apply Post filtering policies */
   if (d_wantsRPZ && !d_appliedPolicy.wasHit()) {
     auto luaLocal = g_luaconfs.getLocal();
@@ -3401,6 +3458,55 @@ void SyncRes::sanitizeRecords(const std::string& prefix, LWResult& lwr, const DN
   }
 }
 
+
+void SyncRes::rememberParentSetIfNeeded(const DNSName& domain, const vector<DNSRecord>& newRecords, unsigned int depth)
+{
+  vector<DNSRecord> existing;
+  bool wasAuth = false;
+  auto ttl = g_recCache->get(d_now.tv_sec, domain, QType::NS, false, &existing, d_cacheRemote, false, d_routingTag, nullptr, nullptr, nullptr, nullptr, &wasAuth);
+
+  if (ttl <= 0 || wasAuth) {
+    return;
+  }
+  {
+    auto lock = s_savedParentNSSet.lock();
+    if (lock->find(domain) != lock->end()) {
+      // no relevant data, or we already stored the parent data
+      return;
+    }
+  }
+
+  set<DNSName> authSet;
+  for (const auto& ns : newRecords) {
+    auto content = getRR<NSRecordContent>(ns);
+    authSet.insert(content->getNS());
+  }
+  // The glue IPs could also differ, but we're not checking that yet, we're only looking for child NS records not
+  // in the parent set
+  bool shouldSave = false;
+  for (const auto& ns : existing) {
+    auto content = getRR<NSRecordContent>(ns);
+    if (authSet.count(content->getNS()) == 0) {
+      LOG(d_prefix << domain << ": at least one child NS was not in the parent NS set, remembering parent NS set and cached IPs" << endl);
+      shouldSave = true;
+      break;
+    }
+  }
+
+  if (shouldSave) {
+    map<DNSName, vector<ComboAddress>> entries;
+    for (const auto& ns : existing) {
+      auto content = getRR<NSRecordContent>(ns);
+      const DNSName& name = content->getNS();
+      set<GetBestNSAnswer> beenthereIgnored;
+      unsigned int nretrieveAddressesForNSIgnored;
+      auto addresses = getAddrs(name, depth, beenthereIgnored, true, nretrieveAddressesForNSIgnored);
+      entries.emplace(name, addresses);
+    }
+    s_savedParentNSSet.lock()->emplace(domain, std::move(entries), d_now.tv_sec + ttl);
+  }
+}
+
 RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool rdQuery, const ComboAddress& remoteIP)
 {
   bool wasForwardRecurse = wasForwarded && rdQuery;
@@ -3720,6 +3826,10 @@ RCode::rcodes_ SyncRes::updateCacheFromRecords(unsigned int depth, LWResult& lwr
       d_fromAuthIP = remoteIP;
 
       if (doCache) {
+        // Check if we are going to replace a non-auth (parent) NS recordset
+        if (isAA && i->first.type == QType::NS) {
+          rememberParentSetIfNeeded(i->first.name, i->second.records, depth);
+        }
         g_recCache->replace(d_now.tv_sec, i->first.name, i->first.type, i->second.records, i->second.signatures, authorityRecs, i->first.type == QType::DS ? true : isAA, auth, i->first.place == DNSResourceRecord::ANSWER ? ednsmask : boost::none, d_routingTag, recordState, remoteIP);
 
         if (g_aggressiveNSECCache && needWildcardProof && recordState == vState::Secure && i->first.place == DNSResourceRecord::ANSWER && i->first.name == qname && !i->second.signatures.empty() && !d_routingTag && !ednsmask) {
@@ -4531,7 +4641,8 @@ bool SyncRes::doDoTtoAuth(const DNSName& ns) const
  */
 int SyncRes::doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, const DNSName &qname, const QType qtype,
                          vector<DNSRecord>&ret,
-                         unsigned int depth, set<GetBestNSAnswer>&beenthere, vState& state, StopAtDelegation* stopAtDelegation)
+                         unsigned int depth, set<GetBestNSAnswer>&beenthere, vState& state, StopAtDelegation* stopAtDelegation,
+                         map<DNSName, vector<ComboAddress>>* fallBack)
 {
   auto luaconfsLocal = g_luaconfs.getLocal();
   string prefix;
@@ -4628,8 +4739,13 @@ int SyncRes::doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, con
         }
       }
       else {
-        /* if tns is empty, retrieveAddressesForNS() knows we have hardcoded servers (i.e. "forwards") */
-        remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly, addressQueriesForNS);
+        if (fallBack == nullptr) {
+          /* if tns is empty, retrieveAddressesForNS() knows we have hardcoded servers (i.e. "forwards") */
+          remoteIPs = retrieveAddressesForNS(prefix, qname, tns, depth, beenthere, rnameservers, nameservers, sendRDQuery, pierceDontQuery, flawedNSSet, cacheOnly, addressQueriesForNS);
+        } else {
+          // should be save, caller makes sure nameservers and fallback contain the same names
+          remoteIPs = fallBack->at(tns->first);
+        }
 
         if(remoteIPs.empty()) {
           LOG(prefix<<qname<<": Failed to get IP for NS "<<tns->first<<", trying next if available"<<endl);
index db4dc45ad7b2c438d20dbd0d21ee512f3f7f57ef..989d1d6d5e63349a539ca009a51db051b0b43336 100644 (file)
@@ -407,8 +407,48 @@ public:
 
   };
 
+  struct SavedParentEntry
+  {
+    SavedParentEntry(const DNSName& name, map<DNSName, vector<ComboAddress>>&& nsAddresses, time_t ttd)
+      : d_domain(name), d_nsAddresses(nsAddresses), d_ttd(ttd)
+    {
+    }
+    DNSName d_domain;
+    map<DNSName, vector<ComboAddress>> d_nsAddresses;
+    time_t d_ttd;
+    mutable uint64_t d_count{0};
+  };
+
+  typedef multi_index_container<
+    SavedParentEntry,
+    indexed_by<ordered_unique<tag<DNSName>, member<SavedParentEntry, DNSName, &SavedParentEntry::d_domain>>,
+               ordered_non_unique<tag<time_t>, member<SavedParentEntry, time_t, &SavedParentEntry::d_ttd>>
+               >> SavedParentNSSetBase;
+
+  class SavedParentNSSet : public SavedParentNSSetBase
+  {
+  public:
+    void prune(time_t now)
+    {
+      auto &ind = get<time_t>();
+      ind.erase(ind.begin(), ind.upper_bound(now));
+    }
+    void inc(const DNSName& name)
+    {
+      auto it = find(name);
+      if (it != end()) {
+        ++(*it).d_count;
+      }
+    }
+    SavedParentNSSet getMapCopy() const
+    {
+      return *this;
+    }
+  };
+
   static LockGuarded<fails_t<ComboAddress>> s_fails;
   static LockGuarded<fails_t<DNSName>> s_nonresolving;
+  static LockGuarded <SavedParentNSSet> s_savedParentNSSet;
 
   struct ThreadLocalStorage {
     nsspeeds_t nsSpeeds;
@@ -426,6 +466,8 @@ public:
   static uint64_t doDumpThrottleMap(int fd);
   static uint64_t doDumpFailedServers(int fd);
   static uint64_t doDumpNonResolvingNS(int fd);
+  static uint64_t doDumpSavedParentNSSets(int fd);
+
   static int getRootNS(struct timeval now, asyncresolve_t asyncCallback, unsigned int depth);
   static void addDontQuery(const std::string& mask)
   {
@@ -570,6 +612,18 @@ public:
   {
     s_nonresolving.lock()->prune(cutoff);
   }
+  static void clearSaveParentsNSSets()
+  {
+    s_savedParentNSSet.lock()->clear();
+  }
+  static size_t getSaveParentsNSSetsSize()
+  {
+    return s_savedParentNSSet.lock()->size();
+  }
+  static void pruneSaveParentsNSSets(time_t now)
+  {
+    s_savedParentNSSet.lock()->prune(now);
+  }
   static void setDomainMap(std::shared_ptr<domainmap_t> newMap)
   {
     t_sstorage.domainmap = newMap;
@@ -857,7 +911,8 @@ private:
 
   bool doDoTtoAuth(const DNSName& ns) const;
   int doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, const DNSName &qname, QType qtype, vector<DNSRecord>&ret,
-                  unsigned int depth, set<GetBestNSAnswer>&beenthere, vState& state, StopAtDelegation* stopAtDelegation);
+                  unsigned int depth, set<GetBestNSAnswer>&beenthere, vState& state, StopAtDelegation* stopAtDelegation,
+                  std::map<DNSName, std::vector<ComboAddress>>* fallback);
   bool doResolveAtThisIP(const std::string& prefix, const DNSName& qname, const QType qtype, LWResult& lwr, boost::optional<Netmask>& ednsmask, const DNSName& auth, bool const sendRDQuery, const bool wasForwarded, const DNSName& nsName, const ComboAddress& remoteIP, bool doTCP, bool doDoT, bool& truncated, bool& spoofed);
   bool processAnswer(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType qtype, DNSName& auth, bool wasForwarded, const boost::optional<Netmask> ednsmask, bool sendRDQuery, NsSet &nameservers, std::vector<DNSRecord>& ret, const DNSFilterEngine& dfe, bool* gotNewServers, int* rcode, vState& state, const ComboAddress& remoteIP);
 
@@ -888,6 +943,7 @@ private:
 /* This function will check whether the answer should have the AA bit set, and will set if it should be set and isn't.
    This is unfortunately needed to deal with very crappy so-called DNS servers */
   void fixupAnswer(const std::string& prefix, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, bool rdQuery);
+  void rememberParentSetIfNeeded(const DNSName& domain, const vector<DNSRecord>& newRecords, unsigned int depth);
   RCode::rcodes_ updateCacheFromRecords(unsigned int depth, LWResult& lwr, const DNSName& qname, const QType qtype, const DNSName& auth, bool wasForwarded, const boost::optional<Netmask>, vState& state, bool& needWildcardProof, bool& gatherWildcardProof, unsigned int& wildcardLabelsCount, bool sendRDQuery, const ComboAddress& remoteIP);
   bool processRecords(const std::string& prefix, const DNSName& qname, const QType qtype, const DNSName& auth, LWResult& lwr, const bool sendRDQuery, vector<DNSRecord>& ret, set<DNSName>& nsset, DNSName& newtarget, DNSName& newauth, bool& realreferral, bool& negindic, vState& state, const bool needWildcardProof, const bool gatherwildcardProof, const unsigned int wildcardLabelsCount, int& rcode, bool& negIndicHasSignatures, unsigned int depth);