From: Otto Moerbeek Date: Mon, 31 Jan 2022 11:36:07 +0000 (+0100) Subject: Fix stats for failed servers (named hosts for some historic reaon) and add non-resolv... X-Git-Tag: auth-4.7.0-alpha1~23^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=189fe4788d8fb3aa247c7f13f6a9118f1b09d6dd;p=thirdparty%2Fpdns.git Fix stats for failed servers (named hosts for some historic reaon) and add non-resolving metrics. Also, use a safe copy to dump the maps. --- diff --git a/pdns/rec-snmp.cc b/pdns/rec-snmp.cc index d6867d86a4..f1d33799f2 100644 --- a/pdns/rec-snmp.cc +++ b/pdns/rec-snmp.cc @@ -145,6 +145,7 @@ static const oid udp6InCsumErrorsOID[] = {RECURSOR_STATS_OID, 123}; #endif /* __linux__ */ static const oid sourceDisallowedNotifyOID[] = {RECURSOR_STATS_OID, 124}; static const oid zoneDisallowedNotifyOID[] = {RECURSOR_STATS_OID, 125}; +static const oid nonResolvingNSEntriesOID[] = {RECURSOR_STATS_OID, 126}; static std::unordered_map s_statsMap; @@ -379,5 +380,6 @@ RecursorSNMPAgent::RecursorSNMPAgent(const std::string& name, const std::string& registerCounter64Stat("almost-expired-pushed", almostExpiredPushed, OID_LENGTH(almostExpiredPushed)); registerCounter64Stat("almost-expired-run", almostExpiredRun, OID_LENGTH(almostExpiredRun)); registerCounter64Stat("almost-expired-exceptions", almostExpiredExceptions, OID_LENGTH(almostExpiredExceptions)); + registerCounter64Stat("non-resolving-ns-entries", nonResolvingNSEntriesOID, OID_LENGTH(nonResolvingNSEntriesOID)); #endif /* HAVE_NET_SNMP */ } diff --git a/pdns/rec_channel_rec.cc b/pdns/rec_channel_rec.cc index f7c3f4dfbd..3a56a59f7d 100644 --- a/pdns/rec_channel_rec.cc +++ b/pdns/rec_channel_rec.cc @@ -399,7 +399,8 @@ static RecursorControlChannel::Answer doDumpToFile(int s, uint64_t* (*function)( if (threads) { int fd = fdw; total = broadcastAccFunction([function, fd] { return function(fd); }); - } else { + } + else { auto ret = function(fdw); total = *ret; delete ret; @@ -970,17 +971,6 @@ static uint64_t getNegCacheSize() return g_negCache->size(); } -static uint64_t* pleaseGetFailedHostsSize() -{ - uint64_t tmp = (SyncRes::getThrottledServersSize()); - return new uint64_t(tmp); -} - -static uint64_t getFailedHostsSize() -{ - return broadcastAccFunction(pleaseGetFailedHostsSize); -} - uint64_t* pleaseGetNsSpeedsSize() { return new uint64_t(SyncRes::getNSSpeedsSize()); @@ -991,11 +981,6 @@ static uint64_t getNsSpeedsSize() return broadcastAccFunction(pleaseGetNsSpeedsSize); } -uint64_t* pleaseGetFailedServersSize() -{ - return new uint64_t(SyncRes::getFailedServersSize()); -} - uint64_t* pleaseGetEDNSStatusesSize() { return new uint64_t(SyncRes::getEDNSStatusesSize()); @@ -1256,7 +1241,8 @@ static void registerAllStats1() addGetStat("throttle-entries", getThrottleSize); addGetStat("nsspeeds-entries", getNsSpeedsSize); - addGetStat("failed-host-entries", getFailedHostsSize); + addGetStat("failed-host-entries", SyncRes::getFailedServersSize); + addGetStat("non-resolving-ns-entries", SyncRes::getNonResolvingNSSize); addGetStat("concurrent-queries", getConcurrentQueries); addGetStat("security-status", &g_security_status); diff --git a/pdns/recursordist/RECURSOR-MIB.txt b/pdns/recursordist/RECURSOR-MIB.txt index 22773d2549..2992e94710 100644 --- a/pdns/recursordist/RECURSOR-MIB.txt +++ b/pdns/recursordist/RECURSOR-MIB.txt @@ -45,6 +45,9 @@ rec MODULE-IDENTITY REVISION "202111090000Z" DESCRIPTION "Added NOTIFY-related metrics." + REVISION "202201310000Z" + DESCRIPTION "Added non-resolving NS name metric." + ::= { powerdns 2 } powerdns OBJECT IDENTIFIER ::= { enterprises 43315 } @@ -464,7 +467,7 @@ failedHostEntries OBJECT-TYPE MAX-ACCESS read-only STATUS current DESCRIPTION - "Number of failed host entries" + "Number of entries in the failed NS cache" ::= { stats 52 } concurrentQueries OBJECT-TYPE @@ -1050,6 +1053,15 @@ zoneDisallowedNotify OBJECT-TYPE DESCRIPTION "Number of NOTIFY operations not allowed by allow-notify-for" ::= { stats 125 } + +nonResolvingNSEntries OBJECT-TYPE + SYNTAX CounterBasedGauge64 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "Number of entries in the non-resolving NS name cache" + ::= { stats 126 } + --- --- Traps / Notifications --- diff --git a/pdns/recursordist/docs/metrics.rst b/pdns/recursordist/docs/metrics.rst index 5223f78c86..c59fba5923 100644 --- a/pdns/recursordist/docs/metrics.rst +++ b/pdns/recursordist/docs/metrics.rst @@ -489,7 +489,7 @@ number of servers that sent an invalid EDNS PING response failed-host-entries ^^^^^^^^^^^^^^^^^^^ -number of servers that failed to resolve +number of addresses in the failed NS cache. .. _stat-fd-usage: @@ -546,6 +546,10 @@ noerror-answers ^^^^^^^^^^^^^^^ counts the number of times it answered NOERROR since starting +non-resolving-ns-entries +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +number of entries in the non-resolving NS name cache + noping-outqueries ^^^^^^^^^^^^^^^^^ number of queries sent out without ENDS PING diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc index c9505adb64..202223fee4 100644 --- a/pdns/recursordist/rec-main.cc +++ b/pdns/recursordist/rec-main.cc @@ -771,7 +771,7 @@ static void doStats(void) g_log << Logger::Notice << "stats: throttle map: " << broadcastAccFunction(pleaseGetThrottleSize) << ", ns speeds: " << broadcastAccFunction(pleaseGetNsSpeedsSize) << ", failed ns: " - << broadcastAccFunction(pleaseGetFailedServersSize) << ", ednsmap: " + << SyncRes::getFailedServersSize() << ", ednsmap: " << broadcastAccFunction(pleaseGetEDNSStatusesSize) << endl; g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries) << "%"; g_log << Logger::Notice << ", " << ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries + SyncRes::s_throttledqueries) << "% throttled" << endl; diff --git a/pdns/syncres.cc b/pdns/syncres.cc index c36f653463..62d62a2c06 100644 --- a/pdns/syncres.cc +++ b/pdns/syncres.cc @@ -537,7 +537,8 @@ uint64_t SyncRes::doDumpFailedServers(int fd) fprintf(fp.get(), "; remote IP\tcount\ttimestamp\n"); uint64_t count=0; - for(const auto& i : s_fails.lock()->getMap()) + // We get a copy, so the I/O does not need to happen while holding the lock + for (const auto& i : s_fails.lock()->getMapCopy()) { count++; char tmp[26]; @@ -563,7 +564,8 @@ uint64_t SyncRes::doDumpNonResolvingNS(int fd) fprintf(fp.get(), "; name\tcount\ttimestamp\n"); uint64_t count=0; - for(const auto& i : s_nonresolving.lock()->getMap()) + // We get a copy, so the I/O does not need to happen while holding the lock + for (const auto& i : s_nonresolving.lock()->getMapCopy()) { count++; char tmp[26]; diff --git a/pdns/syncres.hh b/pdns/syncres.hh index 7f081d904b..9ffcebfd1e 100644 --- a/pdns/syncres.hh +++ b/pdns/syncres.hh @@ -210,9 +210,10 @@ public: ordered_non_unique, member> >> cont_t; - const cont_t& getMap() const { + cont_t getMapCopy() const { return d_cont; } + counter_t value(const T& t) const { auto i = d_cont.find(t); diff --git a/pdns/ws-recursor.cc b/pdns/ws-recursor.cc index de4b6e5fec..3c31fc90af 100644 --- a/pdns/ws-recursor.cc +++ b/pdns/ws-recursor.cc @@ -690,8 +690,11 @@ const std::map MetricDefinitionStorage::d_metrics MetricDefinition(PrometheusMetricType::counter, "Number of servers that sent an invalid EDNS PING response")}, {"failed-host-entries", - MetricDefinition(PrometheusMetricType::counter, - "Number of servers that failed to resolve")}, + MetricDefinition(PrometheusMetricType::gauge, + "Number of entries in the failed NS cache")}, + {"non-resolving-ns-entries", + MetricDefinition(PrometheusMetricType::gauge, + "Number of entries in the non-resolving NS name cache")}, {"ignored-packets", MetricDefinition(PrometheusMetricType::counter, "Number of non-query packets received on server sockets that should only get query packets")},