From: Otto Moerbeek Date: Wed, 28 Aug 2024 09:32:20 +0000 (+0200) Subject: Add metrics X-Git-Tag: rec-5.2.0-alpha1~101^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fb551ae58841954abee6d9aef44af3ceb4806be2;p=thirdparty%2Fpdns.git Add metrics --- diff --git a/pdns/recursordist/RECURSOR-MIB.txt b/pdns/recursordist/RECURSOR-MIB.txt index ba743382dd..7103ead6ff 100644 --- a/pdns/recursordist/RECURSOR-MIB.txt +++ b/pdns/recursordist/RECURSOR-MIB.txt @@ -66,6 +66,9 @@ rec MODULE-IDENTITY REVISION "202408130000Z" DESCRIPTION "Added metric for chain limits reached" + REVISION "202408280000Z" + DESCRIPTION "Added metric for too many incoming TCP connections" + ::= { powerdns 2 } powerdns OBJECT IDENTIFIER ::= { enterprises 43315 } @@ -1280,6 +1283,14 @@ chainLimits OBJECT-TYPE "Chain limits reached" ::= { stats 151 } +tcpOverflow OBJECT-TYPE + SYNTAX Counter64 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "Incoming TCP limits reached" + ::= { stats 152 } + --- --- Traps / Notifications --- @@ -1478,7 +1489,8 @@ recGroup OBJECT-GROUP udrEvents, maxChainLength, maxChainWeight, - chainLimits + chainLimits, + tcpOverflow } STATUS current DESCRIPTION "Objects conformance group for PowerDNS Recursor" diff --git a/pdns/recursordist/docs/metrics.rst b/pdns/recursordist/docs/metrics.rst index 8f1fef53db..07811ce8ab 100644 --- a/pdns/recursordist/docs/metrics.rst +++ b/pdns/recursordist/docs/metrics.rst @@ -756,6 +756,12 @@ taskqueue-size number of tasks currently in the taskqueues +.. _stat-tcp-overflow: + +tcp-overflow +^^^^^^^^^^^^ +number of times an incoming TCP connection was closed immediately because there were too many open connections already + .. _stat-tcp-client-overflow: tcp-client-overflow diff --git a/pdns/recursordist/rec-snmp.cc b/pdns/recursordist/rec-snmp.cc index f6e91ec455..5825defd30 100644 --- a/pdns/recursordist/rec-snmp.cc +++ b/pdns/recursordist/rec-snmp.cc @@ -206,6 +206,7 @@ static const oid10 udrEventsOID = {RECURSOR_STATS_OID, 148}; static const oid10 maxChainLengthOID = {RECURSOR_STATS_OID, 149}; static const oid10 maxChainWeightOID = {RECURSOR_STATS_OID, 150}; static const oid10 chainLimitsOID = {RECURSOR_STATS_OID, 151}; +static const oid10 tcpOverflowOID = {RECURSOR_STATS_OID, 152}; static std::unordered_map s_statsMap; @@ -462,6 +463,7 @@ RecursorSNMPAgent::RecursorSNMPAgent(const std::string& name, const std::string& registerCounter64Stat("udr-events", udrEventsOID); registerCounter64Stat("max-chain-length", maxChainLengthOID); registerCounter64Stat("max-chain-weight", maxChainWeightOID); + registerCounter64Stat("tcp-overflow", tcpOverflowOID); #endif /* HAVE_NET_SNMP */ } diff --git a/pdns/recursordist/rec-tcounters.hh b/pdns/recursordist/rec-tcounters.hh index fdbce78d91..a696c90b56 100644 --- a/pdns/recursordist/rec-tcounters.hh +++ b/pdns/recursordist/rec-tcounters.hh @@ -60,6 +60,7 @@ enum class Counter : uint8_t sourceDisallowedNotify, // when this is increased, qcounter is also zoneDisallowedNotify, // when this is increased, qcounter is also policyDrops, + tcpOverflow, tcpClientOverflow, clientParseError, serverParseError, diff --git a/pdns/recursordist/rec-tcp.cc b/pdns/recursordist/rec-tcp.cc index 353550d292..ef44a55a77 100644 --- a/pdns/recursordist/rec-tcp.cc +++ b/pdns/recursordist/rec-tcp.cc @@ -694,9 +694,12 @@ void handleNewTCPQuestion(int fileDesc, [[maybe_unused]] FDMultiplexer::funcpara if (newsock < 0) { return; } - auto closeSock = [newsock](const string& msg) { + auto closeSock = [newsock](rec::Counter cnt, const string& msg) { try { closesocket(newsock); + t_Counters.at(cnt)++; + // We want this bump to percolate up without too much delay + t_Counters.updateSnap(false); } catch (const PDNSException& e) { g_slogtcpin->error(Logr::Error, e.reason, msg, "exception", Logging::Loggable("PDNSException")); @@ -704,13 +707,11 @@ void handleNewTCPQuestion(int fileDesc, [[maybe_unused]] FDMultiplexer::funcpara }; if (TCPConnection::getCurrentConnections() >= g_maxTCPClients) { - t_Counters.at(rec::Counter::tcpClientOverflow)++; - closeSock("Error closing TCP socket after an overflow drop"); + closeSock(rec::Counter::tcpOverflow, "Error closing TCP socket after an overflow drop"); return; } if (g_multiTasker->numProcesses() >= g_maxMThreads) { - t_Counters.at(rec::Counter::overCapacityDrops)++; - closeSock("Error closing TCP socket after an over capacity drop"); + closeSock(rec::Counter::overCapacityDrops, "Error closing TCP socket after an over capacity drop"); return; } @@ -733,14 +734,12 @@ void handleNewTCPQuestion(int fileDesc, [[maybe_unused]] FDMultiplexer::funcpara SLOG(g_log << Logger::Error << "[" << g_multiTasker->getTid() << "] dropping TCP query from " << mappedSource.toString() << ", address neither matched by allow-from nor proxy-protocol-from" << endl, g_slogtcpin->info(Logr::Error, "dropping TCP query address neither matched by allow-from nor proxy-protocol-from", "source", Logging::Loggable(mappedSource))); } - t_Counters.at(rec::Counter::unauthorizedTCP)++; - closeSock("Error closing TCP socket after an ACL drop"); + closeSock(rec::Counter::unauthorizedTCP, "Error closing TCP socket after an ACL drop"); return; } if (g_maxTCPPerClient > 0 && t_tcpClientCounts->count(addr) > 0 && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) { - t_Counters.at(rec::Counter::tcpClientOverflow)++; - closeSock("Error closing TCP socket after a client overflow drop"); + closeSock(rec::Counter::tcpClientOverflow, "Error closing TCP socket after a client overflow drop"); return; } diff --git a/pdns/recursordist/rec_channel_rec.cc b/pdns/recursordist/rec_channel_rec.cc index b46fed7684..f4eb031a91 100644 --- a/pdns/recursordist/rec_channel_rec.cc +++ b/pdns/recursordist/rec_channel_rec.cc @@ -1349,6 +1349,7 @@ static void registerAllStats1() addGetStat("unauthorized-tcp", [] { return g_Counters.sum(rec::Counter::unauthorizedTCP); }); addGetStat("source-disallowed-notify", [] { return g_Counters.sum(rec::Counter::sourceDisallowedNotify); }); addGetStat("zone-disallowed-notify", [] { return g_Counters.sum(rec::Counter::zoneDisallowedNotify); }); + addGetStat("tcp-overflow", [] { return g_Counters.sum(rec::Counter::tcpOverflow); }); addGetStat("tcp-client-overflow", [] { return g_Counters.sum(rec::Counter::tcpClientOverflow); }); addGetStat("client-parse-errors", [] { return g_Counters.sum(rec::Counter::clientParseError); }); diff --git a/pdns/recursordist/ws-recursor.cc b/pdns/recursordist/ws-recursor.cc index c2a745ac2d..54f4773ff4 100644 --- a/pdns/recursordist/ws-recursor.cc +++ b/pdns/recursordist/ws-recursor.cc @@ -894,6 +894,9 @@ const std::map MetricDefinitionStorage::d_metrics {"tcp-client-overflow", MetricDefinition(PrometheusMetricType::counter, "Number of times an IP address was denied TCP access because it already had too many connections")}, + {"tcp-overflow", + MetricDefinition(PrometheusMetricType::counter, + "Number of times a TCP connection was denied access because too many connections")}, {"tcp-clients", MetricDefinition(PrometheusMetricType::gauge, "Number of currently active TCP/IP clients")}, diff --git a/regression-tests.recursor-dnssec/test_SNMP.py b/regression-tests.recursor-dnssec/test_SNMP.py index a386519554..40907ad3ab 100644 --- a/regression-tests.recursor-dnssec/test_SNMP.py +++ b/regression-tests.recursor-dnssec/test_SNMP.py @@ -21,7 +21,7 @@ class SNMPTest(RecursorTest): """ def _checkStatsValues(self, results): - count = 151 + count = 152 for i in list(range(1, count)): oid = self._snmpOID + '.1.' + str(i) + '.0' self.assertTrue(oid in results)