]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
Fix stats for failed servers (named hosts for some historic reaon) and add non-resolv...
authorOtto Moerbeek <otto.moerbeek@open-xchange.com>
Mon, 31 Jan 2022 11:36:07 +0000 (12:36 +0100)
committerOtto Moerbeek <otto.moerbeek@open-xchange.com>
Tue, 1 Feb 2022 08:43:11 +0000 (09:43 +0100)
Also, use a safe copy to dump the maps.

pdns/rec-snmp.cc
pdns/rec_channel_rec.cc
pdns/recursordist/RECURSOR-MIB.txt
pdns/recursordist/docs/metrics.rst
pdns/recursordist/rec-main.cc
pdns/syncres.cc
pdns/syncres.hh
pdns/ws-recursor.cc

index d6867d86a47278e9cedaa38759ce877e83b54951..f1d33799f20ae2dfd075113592502b7bf1e96182 100644 (file)
@@ -145,6 +145,7 @@ static const oid udp6InCsumErrorsOID[] = {RECURSOR_STATS_OID, 123};
 #endif /* __linux__ */
 static const oid sourceDisallowedNotifyOID[] = {RECURSOR_STATS_OID, 124};
 static const oid zoneDisallowedNotifyOID[] = {RECURSOR_STATS_OID, 125};
+static const oid nonResolvingNSEntriesOID[] = {RECURSOR_STATS_OID, 126};
 
 static std::unordered_map<oid, std::string> s_statsMap;
 
@@ -379,5 +380,6 @@ RecursorSNMPAgent::RecursorSNMPAgent(const std::string& name, const std::string&
   registerCounter64Stat("almost-expired-pushed", almostExpiredPushed, OID_LENGTH(almostExpiredPushed));
   registerCounter64Stat("almost-expired-run", almostExpiredRun, OID_LENGTH(almostExpiredRun));
   registerCounter64Stat("almost-expired-exceptions", almostExpiredExceptions, OID_LENGTH(almostExpiredExceptions));
+  registerCounter64Stat("non-resolving-ns-entries", nonResolvingNSEntriesOID, OID_LENGTH(nonResolvingNSEntriesOID));
 #endif /* HAVE_NET_SNMP */
 }
index f7c3f4dfbd0b116bdc26e784fcc5d85d16053c29..3a56a59f7d46b88b6796f156956fcbf928821799 100644 (file)
@@ -399,7 +399,8 @@ static RecursorControlChannel::Answer doDumpToFile(int s, uint64_t* (*function)(
     if (threads) {
       int fd = fdw;
       total = broadcastAccFunction<uint64_t>([function, fd] { return function(fd); });
-    } else {
+    }
+    else {
       auto ret = function(fdw);
       total = *ret;
       delete ret;
@@ -970,17 +971,6 @@ static uint64_t getNegCacheSize()
   return g_negCache->size();
 }
 
-static uint64_t* pleaseGetFailedHostsSize()
-{
-  uint64_t tmp = (SyncRes::getThrottledServersSize());
-  return new uint64_t(tmp);
-}
-
-static uint64_t getFailedHostsSize()
-{
-  return broadcastAccFunction<uint64_t>(pleaseGetFailedHostsSize);
-}
-
 uint64_t* pleaseGetNsSpeedsSize()
 {
   return new uint64_t(SyncRes::getNSSpeedsSize());
@@ -991,11 +981,6 @@ static uint64_t getNsSpeedsSize()
   return broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize);
 }
 
-uint64_t* pleaseGetFailedServersSize()
-{
-  return new uint64_t(SyncRes::getFailedServersSize());
-}
-
 uint64_t* pleaseGetEDNSStatusesSize()
 {
   return new uint64_t(SyncRes::getEDNSStatusesSize());
@@ -1256,7 +1241,8 @@ static void registerAllStats1()
   addGetStat("throttle-entries", getThrottleSize);
 
   addGetStat("nsspeeds-entries", getNsSpeedsSize);
-  addGetStat("failed-host-entries", getFailedHostsSize);
+  addGetStat("failed-host-entries", SyncRes::getFailedServersSize);
+  addGetStat("non-resolving-ns-entries", SyncRes::getNonResolvingNSSize);
 
   addGetStat("concurrent-queries", getConcurrentQueries);
   addGetStat("security-status", &g_security_status);
index 22773d2549545163449915c479978c3d6865ed54..2992e94710fd3ea89889b8bc243225a1cdd8cb73 100644 (file)
@@ -45,6 +45,9 @@ rec MODULE-IDENTITY
     REVISION "202111090000Z"
     DESCRIPTION "Added NOTIFY-related metrics."
 
+    REVISION "202201310000Z"
+    DESCRIPTION "Added non-resolving NS name metric."
+
     ::= { powerdns 2 }
 
 powerdns               OBJECT IDENTIFIER ::= { enterprises 43315 }
@@ -464,7 +467,7 @@ failedHostEntries OBJECT-TYPE
     MAX-ACCESS read-only
     STATUS current
     DESCRIPTION
-        "Number of failed host entries"
+        "Number of entries in the failed NS cache"
     ::= { stats 52 }
 
 concurrentQueries OBJECT-TYPE
@@ -1050,6 +1053,15 @@ zoneDisallowedNotify OBJECT-TYPE
     DESCRIPTION
         "Number of NOTIFY operations not allowed by allow-notify-for"
     ::= { stats 125 }
+
+nonResolvingNSEntries OBJECT-TYPE
+    SYNTAX CounterBasedGauge64
+    MAX-ACCESS read-only
+    STATUS current
+    DESCRIPTION
+        "Number of entries in the non-resolving NS name cache"
+    ::= { stats 126 }
+
 ---
 --- Traps / Notifications
 ---
index 5223f78c86590dfc58c830a7859c827aeb11a55e..c59fba5923ed53bc517f92aebbf0eb7f0739e19a 100644 (file)
@@ -489,7 +489,7 @@ number of servers that sent an invalid EDNS   PING response
 
 failed-host-entries
 ^^^^^^^^^^^^^^^^^^^
-number of servers that failed to resolve
+number of addresses in the failed NS cache.
 
 .. _stat-fd-usage:
 
@@ -546,6 +546,10 @@ noerror-answers
 ^^^^^^^^^^^^^^^
 counts the number of times it answered NOERROR   since starting
 
+non-resolving-ns-entries
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+number of entries in the non-resolving NS name cache
+
 noping-outqueries
 ^^^^^^^^^^^^^^^^^
 number of queries sent out without ENDS PING
index c9505adb6457396c5ebf704d1fb32bfbc6b1dc31..202223fee4a27a5cb50d93cff7d5a89e98bc3abf 100644 (file)
@@ -771,7 +771,7 @@ static void doStats(void)
     g_log << Logger::Notice << "stats: throttle map: "
           << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) << ", ns speeds: "
           << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize) << ", failed ns: "
-          << broadcastAccFunction<uint64_t>(pleaseGetFailedServersSize) << ", ednsmap: "
+          << SyncRes::getFailedServersSize() << ", ednsmap: "
           << broadcastAccFunction<uint64_t>(pleaseGetEDNSStatusesSize) << endl;
     g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries) << "%";
     g_log << Logger::Notice << ", " << ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries + SyncRes::s_throttledqueries) << "% throttled" << endl;
index c36f6534636cb41ea72e3b9c18dd88f3b1beab84..62d62a2c06527f67a32e63c2313079331b4215e2 100644 (file)
@@ -537,7 +537,8 @@ uint64_t SyncRes::doDumpFailedServers(int fd)
   fprintf(fp.get(), "; remote IP\tcount\ttimestamp\n");
   uint64_t count=0;
 
-  for(const auto& i : s_fails.lock()->getMap())
+  // We get a copy, so the I/O does not need to happen while holding the lock
+  for (const auto& i : s_fails.lock()->getMapCopy())
   {
     count++;
     char tmp[26];
@@ -563,7 +564,8 @@ uint64_t SyncRes::doDumpNonResolvingNS(int fd)
   fprintf(fp.get(), "; name\tcount\ttimestamp\n");
   uint64_t count=0;
 
-  for(const auto& i : s_nonresolving.lock()->getMap())
+  // We get a copy, so the I/O does not need to happen while holding the lock
+  for (const auto& i : s_nonresolving.lock()->getMapCopy())
   {
     count++;
     char tmp[26];
index 7f081d904b2525c868f0bc5718e41e1341935cd5..9ffcebfd1ed13e514f6169e38024bd246f48af82 100644 (file)
@@ -210,9 +210,10 @@ public:
                                   ordered_non_unique<tag<time_t>, member<value_t, time_t, &value_t::last>>
                                   >> cont_t;
 
-  const cont_t& getMap() const {
+  cont_t getMapCopy() const {
     return d_cont;
   }
+
   counter_t value(const T& t) const
   {
     auto i = d_cont.find(t);
index de4b6e5fec961686374860b3beb939592f97167e..3c31fc90af1a0387e68241bb176b99d004b39623 100644 (file)
@@ -690,8 +690,11 @@ const std::map<std::string, MetricDefinition> MetricDefinitionStorage::d_metrics
    MetricDefinition(PrometheusMetricType::counter,
                     "Number of servers that sent an invalid EDNS PING response")},
   {"failed-host-entries",
-   MetricDefinition(PrometheusMetricType::counter,
-                    "Number of servers that failed to resolve")},
+   MetricDefinition(PrometheusMetricType::gauge,
+                    "Number of entries in the failed NS cache")},
+  {"non-resolving-ns-entries",
+   MetricDefinition(PrometheusMetricType::gauge,
+                    "Number of entries in the non-resolving NS name cache")},
   {"ignored-packets",
    MetricDefinition(PrometheusMetricType::counter,
                     "Number of non-query packets received on server sockets that should only get query packets")},