From: Otto Moerbeek Date: Tue, 12 Nov 2019 12:31:28 +0000 (+0100) Subject: Purge map of failed auths periodically by keeping a last changed timestamp. X-Git-Tag: dnsdist-1.4.0^2~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=60e5208a9a20d4cd2153d33b8cd500e9b241373c;p=thirdparty%2Fpdns.git Purge map of failed auths periodically by keeping a last changed timestamp. SyncRes thread local storage includes a map of failed auths which was only cleaned if a specific IP was contacted again and that contact succeeded. Persistent failing auths or auths that are never tried again remained in the map. While here add code to dump the failed servers map. Might (partially?) solve #7771. --- diff --git a/pdns/pdns_recursor.cc b/pdns/pdns_recursor.cc index d36625e4d9..0562ccb919 100644 --- a/pdns/pdns_recursor.cc +++ b/pdns/pdns_recursor.cc @@ -2844,7 +2844,8 @@ static void doStats(void) g_log<(pleaseGetThrottleSize) <<", ns speeds: " - << broadcastAccFunction(pleaseGetNsSpeedsSize)<(pleaseGetNsSpeedsSize)<<", failed ns: " + << broadcastAccFunction(pleaseGetFailedServersSize)< static string doDumpNSSpeeds(T begin, T end) { @@ -367,6 +372,28 @@ static string doDumpThrottleMap(T begin, T end) return "dumped "+std::to_string(total)+" records\n"; } +template +static string doDumpFailedServers(T begin, T end) +{ + T i=begin; + string fname; + + if(i!=end) + fname=*i; + + int fd=open(fname.c_str(), O_CREAT | O_EXCL | O_WRONLY, 0660); + if(fd < 0) + return "Error opening dump file for writing: "+stringerror()+"\n"; + uint64_t total = 0; + try { + total = broadcastAccFunction(boost::bind(pleaseDumpFailedServers, fd)); + } + catch(...){} + + close(fd); + return "dumped "+std::to_string(total)+" records\n"; +} + uint64_t* pleaseWipeCache(const DNSName& canon, bool subtree) { return new uint64_t(t_RC->doWipeCache(canon, subtree)); @@ -885,6 +912,11 @@ static uint64_t getNsSpeedsSize() return broadcastAccFunction(pleaseGetNsSpeedsSize); } +uint64_t* pleaseGetFailedServersSize() +{ + return new uint64_t(SyncRes::getFailedServersSize()); +} + uint64_t* pleaseGetConcurrentQueries() { return new uint64_t(getMT() ? getMT()->numProcesses() : 0); @@ -1612,7 +1644,8 @@ string RecursorControlParser::getAnswer(const string& question, RecursorControlP "dump-edns [status] dump EDNS status to the named file\n" "dump-nsspeeds dump nsspeeds statistics to the named file\n" "dump-rpz dump the content of a RPZ zone to the named file\n" -"dump-throttlemap dump the contents of the throttle to the named file\n" +"dump-throttlemap dump the contents of the throttle map to the named file\n" +"dump-failedservers dump the failed servers to the named file\n" "get [key1] [key2] .. get specific statistics\n" "get-all get all statistics\n" "get-dont-throttle-names get the list of names that are not allowed to be throttled\n" @@ -1684,6 +1717,9 @@ string RecursorControlParser::getAnswer(const string& question, RecursorControlP if(cmd=="dump-nsspeeds") return doDumpNSSpeeds(begin, end); + if(cmd=="dump-failedservers") + return doDumpFailedServers(begin, end); + if(cmd=="dump-rpz") { return doDumpRPZ(begin, end); } diff --git a/pdns/syncres.cc b/pdns/syncres.cc index 8bd1a6861d..2e3b03d1af 100644 --- a/pdns/syncres.cc +++ b/pdns/syncres.cc @@ -472,6 +472,27 @@ uint64_t SyncRes::doDumpThrottleMap(int fd) return count; } +uint64_t SyncRes::doDumpFailedServers(int fd) +{ + auto fp = std::unique_ptr(fdopen(dup(fd), "w"), fclose); + if(!fp) + return 0; + fprintf(fp.get(), "; failed servers dump follows\n"); + fprintf(fp.get(), "; remote IP\tcount\ttimestamp\n"); + uint64_t count=0; + + for(const auto& i : t_sstorage.fails.getMap()) + { + count++; + char tmp[26]; + ctime_r(&i.second.last, tmp); + fprintf(fp.get(), "%s\t%lld\t%s", i.first.toString().c_str(), + static_cast(i.second.value), tmp); + } + + return count; +} + /* so here is the story. First we complete the full resolution process for a domain name. And only THEN do we decide to also do DNSSEC validation, which leads to new queries. To make this simple, we *always* ask for DNSSEC records so that if there are RRSIGs for a name, we'll have them. @@ -3150,7 +3171,7 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname, t_sstorage.nsSpeeds[nsName.empty()? DNSName(remoteIP.toStringWithPort()) : nsName].submit(remoteIP, 1000000, &d_now); // 1 sec // code below makes sure we don't filter COM or the root - if (s_serverdownmaxfails > 0 && (auth != g_rootdnsname) && t_sstorage.fails.incr(remoteIP) >= s_serverdownmaxfails) { + if (s_serverdownmaxfails > 0 && (auth != g_rootdnsname) && t_sstorage.fails.incr(remoteIP, d_now) >= s_serverdownmaxfails) { LOG(prefix< class Counters : public boost::noncopyable { public: - Counters() - { + typedef unsigned long counter_t; + struct value_t { + counter_t value; + time_t last; + }; + typedef std::map cont_t; + + cont_t getMap() const { + return d_cont; } - unsigned long value(const Thing& t) const + counter_t value(const Thing& t) const { - typename cont_t::const_iterator i=d_cont.find(t); + typename cont_t::const_iterator i = d_cont.find(t); - if(i==d_cont.end()) { + if (i == d_cont.end()) { return 0; } - return (unsigned long)i->second; + return i->second.value; } - unsigned long incr(const Thing& t) + + counter_t incr(const Thing& t, const struct timeval & now) { - typename cont_t::iterator i=d_cont.find(t); + typename cont_t::iterator i = d_cont.find(t); - if(i==d_cont.end()) { - d_cont[t]=1; + if (i == d_cont.end()) { + d_cont[t].value = 1; + d_cont[t].last = now.tv_sec; return 1; } else { - if (i->second < std::numeric_limits::max()) - i->second++; - return (unsigned long)i->second; - } + if (i->second.value < std::numeric_limits::max()) { + i->second.value++; + } + i->second.last = now.tv_sec; + return i->second.value; + } } - unsigned long decr(const Thing& t) - { - typename cont_t::iterator i=d_cont.find(t); - if(i!=d_cont.end() && --i->second == 0) { - d_cont.erase(i); - return 0; - } else - return (unsigned long)i->second; - } void clear(const Thing& t) { - typename cont_t::iterator i=d_cont.find(t); + typename cont_t::iterator i = d_cont.find(t); - if(i!=d_cont.end()) { + if (i != d_cont.end()) { d_cont.erase(i); } } + void clear() { d_cont.clear(); } + size_t size() const { return d_cont.size(); } + + void prune(time_t cutoff) { + for (auto it = d_cont.begin(); it != d_cont.end(); ) { + if (it->second.last <= cutoff) { + it = d_cont.erase(it); + } else { + ++it; + } + } + } + private: - typedef map cont_t; cont_t d_cont; }; @@ -406,6 +420,7 @@ public: static uint64_t doEDNSDump(int fd); static uint64_t doDumpNSSpeeds(int fd); static uint64_t doDumpThrottleMap(int fd); + static uint64_t doDumpFailedServers(int fd); static int getRootNS(struct timeval now, asyncresolve_t asyncCallback); static void clearDelegationOnly() { @@ -526,6 +541,10 @@ public: { t_sstorage.fails.clear(); } + static void pruneFailedServers(time_t cutoff) + { + t_sstorage.fails.prune(cutoff); + } static unsigned long getServerFailsCount(const ComboAddress& server) { return t_sstorage.fails.value(server); @@ -1063,6 +1082,7 @@ template T broadcastAccFunction(const boost::function& func); std::shared_ptr parseAuthAndForwards(); uint64_t* pleaseGetNsSpeedsSize(); +uint64_t* pleaseGetFailedServersSize(); uint64_t* pleaseGetCacheSize(); uint64_t* pleaseGetNegCacheSize(); uint64_t* pleaseGetCacheHits();