]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
rec: distinguish OS imposed limits from app imposed limits, specifically on chains 14554/head
authorOtto Moerbeek <otto.moerbeek@open-xchange.com>
Tue, 13 Aug 2024 07:56:54 +0000 (09:56 +0200)
committerOtto Moerbeek <otto.moerbeek@open-xchange.com>
Tue, 13 Aug 2024 10:02:33 +0000 (12:02 +0200)
pdns/recursordist/RECURSOR-MIB.txt
pdns/recursordist/docs/metrics.rst
pdns/recursordist/lwres.hh
pdns/recursordist/pdns_recursor.cc
pdns/recursordist/rec-snmp.cc
pdns/recursordist/rec-tcounters.hh
pdns/recursordist/rec_channel_rec.cc
pdns/recursordist/syncres.cc
pdns/recursordist/ws-recursor.cc
regression-tests.recursor-dnssec/test_SNMP.py

index b64afadc994a081a94cf9bea00bd04e69afb3101..ba743382ddfb9da81b438eef71e5f49e62d4d4a5 100644 (file)
@@ -63,6 +63,9 @@ rec MODULE-IDENTITY
     REVISION "202405230000Z"
     DESCRIPTION "Added metrics for maximum chain length and weight"
 
+    REVISION "202408130000Z"
+    DESCRIPTION "Added metric for chain limits reached"
+
     ::= { powerdns 2 }
 
 powerdns               OBJECT IDENTIFIER ::= { enterprises 43315 }
@@ -1269,6 +1272,14 @@ maxChainWeight OBJECT-TYPE
         "Maximum chain weight"
     ::= { stats 150 }
 
+chainLimits OBJECT-TYPE
+    SYNTAX Counter64
+    MAX-ACCESS read-only
+    STATUS current
+    DESCRIPTION
+        "Chain limits reached"
+    ::= { stats 151 }
+
 ---
 --- Traps / Notifications
 ---
@@ -1466,7 +1477,8 @@ recGroup OBJECT-GROUP
         nodEvents,
         udrEvents,
         maxChainLength,
-        maxChainWeight
+        maxChainWeight,
+        chainLimits
     }
     STATUS current
     DESCRIPTION "Objects conformance group for PowerDNS Recursor"
index 2f3e1f8ff7407ea6a20ee85f43ad084655c204c8..8f1fef53db7f41645d4f4fb4ff9debf725d2f261 100644 (file)
@@ -262,6 +262,10 @@ case-mismatches
 ^^^^^^^^^^^^^^^
 counts the number of mismatches in character   case since starting
 
+chain-limits
+^^^^^^^^^^^^
+counts the number of times a chain limit (size or age) has been hit
+
 chain-resends
 ^^^^^^^^^^^^^
 number of queries chained to existing outstanding   query
index 216d44c90cecd8464bc51f6146400445a7537247..58d43535e32c5e9540a991731a31c12ce82ec653 100644 (file)
@@ -69,9 +69,15 @@ public:
     Success = 1,
     PermanentError = 2 /* not transport related */,
     OSLimitError = 3,
-    Spoofed = 4 /* Spoofing attempt (too many near-misses) */
+    Spoofed = 4, /* Spoofing attempt (too many near-misses) */
+    ChainLimitError = 5,
   };
 
+  [[nodiscard]] static bool isLimitError(Result res)
+  {
+    return res == Result::OSLimitError || res == Result::ChainLimitError;
+  }
+
   vector<DNSRecord> d_records;
   int d_rcode{0};
   bool d_validpacket{false};
index 6f9b2839a1426936c8fd4bb673cce046eb6c441f..e636cf57a86f1deae8b5bf01fe85bd38d6a9a82c 100644 (file)
@@ -308,12 +308,12 @@ LWResult::Result asendto(const void* data, size_t len, int /* flags */,
         *fileDesc = -1; // gets used in waitEvent / sendEvent later on
         auto currentChainSize = chain.first->key->authReqChain.size();
         if (g_maxChainLength > 0 && currentChainSize >= g_maxChainLength) {
-          return LWResult::Result::OSLimitError;
+          return LWResult::Result::ChainLimitError;
         }
         assert(uSec(chain.first->key->creationTime) != 0); // NOLINT
         auto age = now - chain.first->key->creationTime;
         if (uSec(age) > static_cast<uint64_t>(1000) * authWaitTimeMSec(g_multiTasker) * 2 / 3) {
-          return LWResult::Result::OSLimitError;
+          return LWResult::Result::ChainLimitError;
         }
         chain.first->key->authReqChain.insert(qid); // we can chain
         auto maxLength = t_Counters.at(rec::Counter::maxChainLength);
index f433f38374249e893b1c5ddfd1aa780af39ee4d2..f6e91ec455d5d579472a55cf80350e6b1d5f3ea5 100644 (file)
@@ -205,6 +205,7 @@ static const oid10 nodEventsOID = {RECURSOR_STATS_OID, 147};
 static const oid10 udrEventsOID = {RECURSOR_STATS_OID, 148};
 static const oid10 maxChainLengthOID = {RECURSOR_STATS_OID, 149};
 static const oid10 maxChainWeightOID = {RECURSOR_STATS_OID, 150};
+static const oid10 chainLimitsOID = {RECURSOR_STATS_OID, 151};
 
 static std::unordered_map<oid, std::string> s_statsMap;
 
@@ -435,6 +436,7 @@ RecursorSNMPAgent::RecursorSNMPAgent(const std::string& name, const std::string&
   registerCounter64Stat("non-resolving-nameserver-entries", nonResolvingNameserverEntriesOID);
   registerCounter64Stat("maintenance-usec", maintenanceUSecOID);
   registerCounter64Stat("maintenance-calls", maintenanceCallsOID);
+  registerCounter64Stat("chain-limits", chainLimitsOID);
 
 #define RCODE(num) registerCounter64Stat("auth-" + RCode::to_short_s(num) + "-answers", rcode##num##AnswersOID) // NOLINT(cppcoreguidelines-macro-usage)
   RCODE(0);
index bc74b05d576b07ace1e99f00fd089addebd6661a..fdbce78d91cdd9689bf1462b6eb5a68db9c34a0a 100644 (file)
@@ -98,6 +98,7 @@ enum class Counter : uint8_t
   udrCount,
   maxChainLength,
   maxChainWeight,
+  chainLimits,
 
   numberOfCounters
 };
index a2b040cb339bd690ad864a808323772525737510..b46fed7684f97bc15ff696ce458a6259d19885fb 100644 (file)
@@ -1568,6 +1568,7 @@ static void registerAllStats1()
 
   addGetStat("max-chain-length", [] { return g_Counters.max(rec::Counter::maxChainLength); });
   addGetStat("max-chain-weight", [] { return g_Counters.max(rec::Counter::maxChainWeight); });
+  addGetStat("chain-limits", [] { return g_Counters.sum(rec::Counter::chainLimits); });
 
   /* make sure that the ECS stats are properly initialized */
   SyncRes::clearECSStats();
index 04589a29b3bd9506c1a3a0951e83a4b1699c0856..5f15cc173102822545d7e064ead484b08504aed1 100644 (file)
@@ -1609,7 +1609,7 @@ LWResult::Result SyncRes::asyncresolveWrapper(const ComboAddress& address, bool
       ret = asyncresolve(address, sendQname, type, doTCP, sendRDQuery, EDNSLevel, now, srcmask, ctx, d_outgoingProtobufServers, d_frameStreamServers, luaconfsLocal->outgoingProtobufExportConfig.exportTypes, res, chained);
     }
 
-    if (ret == LWResult::Result::PermanentError || ret == LWResult::Result::OSLimitError || ret == LWResult::Result::Spoofed) {
+    if (ret == LWResult::Result::PermanentError || LWResult::isLimitError(ret) || ret == LWResult::Result::Spoofed) {
       break; // transport error, nothing to learn here
     }
 
@@ -5477,6 +5477,11 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
       LOG(prefix << qname << ": Hit a local resource limit resolving" << (doTCP ? " over TCP" : "") << ", probable error: " << stringerror() << endl);
       t_Counters.at(rec::Counter::resourceLimits)++;
     }
+    else if (resolveret == LWResult::Result::ChainLimitError) {
+      /* Chain resource limit reached */
+      LOG(prefix << qname << ": Hit a chain limit resolving" << (doTCP ? " over TCP" : ""));
+      t_Counters.at(rec::Counter::chainLimits)++;
+    }
     else {
       /* LWResult::Result::PermanentError */
       t_Counters.at(rec::Counter::unreachables)++;
@@ -5487,7 +5492,7 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
 
     // don't account for resource limits, they are our own fault
     // And don't throttle when the IP address is on the dontThrottleNetmasks list or the name is part of dontThrottleNames
-    if (resolveret != LWResult::Result::OSLimitError && !chained && !dontThrottle) {
+    if (!LWResult::isLimitError(resolveret) && !chained && !dontThrottle) {
       uint32_t responseUsec = 1000000; // 1 sec for non-timeout cases
       // Use the actual time if we saw a timeout
       if (resolveret == LWResult::Result::Timeout) {
index 342d54c4308cdd3fa02151c218152954555a44aa..c2a745ac2d8a3d9e4b2df31429937dba3a452111 100644 (file)
@@ -1257,6 +1257,10 @@ const std::map<std::string, MetricDefinition> MetricDefinitionStorage::d_metrics
   {"max-chain-weight",
    MetricDefinition(PrometheusMetricType::counter,
                     "Maximum chain weight")},
+
+  {"chain-limits",
+   MetricDefinition(PrometheusMetricType::counter,
+                    "Chain limits reached")},
 };
 
 constexpr bool CHECK_PROMETHEUS_METRICS = false;
index baa087548ded03f0cd685b408938ed864591cbeb..27b31ffbe3ded196a7aab9f8e04bd7db5eaa5caf 100644 (file)
@@ -21,7 +21,7 @@ class TestSNMP(RecursorTest):
     """
 
     def _checkStatsValues(self, results):
-        count = 150
+        count = 151
         for i in list(range(1, count)):
             oid = self._snmpOID + '.1.' + str(i) + '.0'
             self.assertTrue(oid in results)