From: Otto Moerbeek Date: Mon, 22 Aug 2022 09:40:38 +0000 (+0200) Subject: rec: Keep time and count metrics when maintenance is called. X-Git-Tag: rec-4.8.0-alpha1~51^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=67daf77f705c5402920a0468a0b4430507fd1dc0;p=thirdparty%2Fpdns.git rec: Keep time and count metrics when maintenance is called. Fixes #6981 --- diff --git a/pdns/rec-snmp.cc b/pdns/rec-snmp.cc index d3eb3e4c4c..08b6c96129 100644 --- a/pdns/rec-snmp.cc +++ b/pdns/rec-snmp.cc @@ -147,6 +147,8 @@ static const oid udp6InCsumErrorsOID[] = {RECURSOR_STATS_OID, 123}; static const oid sourceDisallowedNotifyOID[] = {RECURSOR_STATS_OID, 124}; static const oid zoneDisallowedNotifyOID[] = {RECURSOR_STATS_OID, 125}; static const oid nonResolvingNameserverEntriesOID[] = {RECURSOR_STATS_OID, 126}; +static const oid maintenanceUSecOID[] = {RECURSOR_STATS_OID, 127}; +static const oid maintenanceCallsOID[] = {RECURSOR_STATS_OID, 128}; static std::unordered_map s_statsMap; @@ -384,5 +386,7 @@ RecursorSNMPAgent::RecursorSNMPAgent(const std::string& name, const std::string& registerCounter64Stat("almost-expired-run", almostExpiredRun, OID_LENGTH(almostExpiredRun)); registerCounter64Stat("almost-expired-exceptions", almostExpiredExceptions, OID_LENGTH(almostExpiredExceptions)); registerCounter64Stat("non-resolving-nameserver-entries", nonResolvingNameserverEntriesOID, OID_LENGTH(nonResolvingNameserverEntriesOID)); + registerCounter64Stat("maintenance-usec", maintenanceUSecOID, OID_LENGTH(maintenanceUSecOID)); + registerCounter64Stat("maintenance-calls", maintenanceCountOID, OID_LENGTH(maintenanceCallsOID)); #endif /* HAVE_NET_SNMP */ } diff --git a/pdns/rec_channel_rec.cc b/pdns/rec_channel_rec.cc index 09219c1644..aa8449a7d3 100644 --- a/pdns/rec_channel_rec.cc +++ b/pdns/rec_channel_rec.cc @@ -1383,6 +1383,9 @@ static void registerAllStats1() addGetStat("idle-tcpout-connections", getCurrentIdleTCPConnections); + addGetStat("maintenance-usec", &g_stats.maintenanceUsec); + addGetStat("maintenance-calls", &g_stats.maintenanceCalls); + /* make sure that the ECS stats are properly initialized */ SyncRes::clearECSStats(); for (size_t idx = 0; idx < SyncRes::s_ecsResponsesBySubnetSize4.size(); idx++) { diff --git a/pdns/recursordist/RECURSOR-MIB.txt b/pdns/recursordist/RECURSOR-MIB.txt index 15be9c21f3..5f9552e16c 100644 --- a/pdns/recursordist/RECURSOR-MIB.txt +++ b/pdns/recursordist/RECURSOR-MIB.txt @@ -48,6 +48,9 @@ rec MODULE-IDENTITY REVISION "202201310000Z" DESCRIPTION "Added non-resolving NS name metric." + REVISION "202208220000Z" + DESCRIPTION "Added internal maintenance metrics." + ::= { powerdns 2 } powerdns OBJECT IDENTIFIER ::= { enterprises 43315 } @@ -1235,7 +1238,9 @@ recGroup OBJECT-GROUP udp6InCsumErrors, sourceDisallowedNotify, zoneDisallowedNotify, - nonResolvingNameserverEntries + nonResolvingNameserverEntries, + maintenanceUSec, + maintenanceCalls } STATUS current DESCRIPTION "Objects conformance group for PowerDNS Recursor" diff --git a/pdns/recursordist/docs/metrics.rst b/pdns/recursordist/docs/metrics.rst index 121dba2cdc..25c0e06598 100644 --- a/pdns/recursordist/docs/metrics.rst +++ b/pdns/recursordist/docs/metrics.rst @@ -512,6 +512,14 @@ ipv6-questions ^^^^^^^^^^^^^^ counts all end-user initiated queries with the RD bit set, received over IPv6 UDP +maintenance-usec +^^^^^^^^^^^^^^^^ +time spent doing internal maintenance, including Lua maintenance + +maintenance-calls +^^^^^^^^^^^^^^^^^ +number of times internal maintenance has been called, including Lua maintenance + malloc-bytes ^^^^^^^^^^^^ returns the number of bytes allocated by the process (broken, always returns 0) diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc index de718753b2..4b37c1e969 100644 --- a/pdns/recursordist/rec-main.cc +++ b/pdns/recursordist/rec-main.cc @@ -2452,7 +2452,15 @@ static void recursorThread() // Use primes, it avoid not being scheduled in cases where the counter has a regular pattern. // We want to call handler thread often, it gets scheduled about 2 times per second if (((threadInfo.isHandler() || threadInfo.isTaskThread()) && s_counter % 11 == 0) || s_counter % 499 == 0) { + struct timeval start; + Utility::gettimeofday(&start); MT->makeThread(houseKeeping, nullptr); + if (!threadInfo.isTaskThread()) { + struct timeval stop; + Utility::gettimeofday(&stop); + g_stats.maintenanceUsec += uSec(stop - start); + ++g_stats.maintenanceCalls; + } } if (!(s_counter % 55)) { @@ -2489,8 +2497,14 @@ static void recursorThread() if (threadInfo.isWorker() || threadInfo.isListener()) { // Only on threads processing queries if (g_now.tv_sec - last_lua_maintenance >= luaMaintenanceInterval) { + struct timeval start; + Utility::gettimeofday(&start); t_pdl->maintenance(); last_lua_maintenance = g_now.tv_sec; + struct timeval stop; + Utility::gettimeofday(&stop); + g_stats.maintenanceUsec += uSec(stop - start); + ++g_stats.maintenanceCalls; } } } diff --git a/pdns/syncres.hh b/pdns/syncres.hh index b1078b9739..e1ea08718f 100644 --- a/pdns/syncres.hh +++ b/pdns/syncres.hh @@ -794,6 +794,8 @@ struct RecursorStats pdns::stat_t proxyProtocolInvalidCount{0}; pdns::stat_t nodLookupsDroppedOversize{0}; pdns::stat_t dns64prefixanswers{0}; + pdns::stat_t maintenanceUsec{0}; + pdns::stat_t maintenanceCalls{0}; RecursorStats() : answers("answers", { 1000, 10000, 100000, 1000000 }), diff --git a/pdns/ws-recursor.cc b/pdns/ws-recursor.cc index 7160d604c1..e34ffc228f 100644 --- a/pdns/ws-recursor.cc +++ b/pdns/ws-recursor.cc @@ -1142,6 +1142,13 @@ const std::map MetricDefinitionStorage::d_metrics MetricDefinition(PrometheusMetricType::gauge, "Number of connections in the TCP idle outgoing connections pool")}, + {"maintenance-usec", + MetricDefinition(PrometheusMetricType::counter, + "Time spent doing internal maintenance, including Lua maintenance")}, + + {"maintenance-calls", + MetricDefinition(PrometheusMetricType::counter, + "Number of times internal maintenance has been called, including Lua maintenance")}, }; #define CHECK_PROMETHEUS_METRICS 0