From: Otto Moerbeek Date: Fri, 1 Nov 2024 10:38:03 +0000 (+0100) Subject: rec: periodic stats cleanup and rename outqueries-per-query to outqueries-per-query... X-Git-Tag: rec-5.2.0-alpha1~14^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8ba616ddfdaa90ae43da1e70e1accf3a3305eff0;p=thirdparty%2Fpdns.git rec: periodic stats cleanup and rename outqueries-per-query to outqueries-per-query-perc Also - cleans up two TCounters that where unused and/or redundant. - group the log lines more naturally - print doubles with two decimal places --- diff --git a/pdns/recursordist/docs/upgrade.rst b/pdns/recursordist/docs/upgrade.rst index 5d74e6fea8..0e06915630 100644 --- a/pdns/recursordist/docs/upgrade.rst +++ b/pdns/recursordist/docs/upgrade.rst @@ -22,6 +22,8 @@ If there are too many incoming TCP connections, new connections will be accepted Previously, excess connections would linger in the OS listen queue until timeout or until processing of incoming TCP connections resumed due to the number of connections being processed dropping below the limit. There is a new metric ``tcp-overflow`` that counts the connections closed immediately. +The ``outqueries-per-query`` value reported in the log by the periodic statistics function is now reported as ``outqueries-per-query-perc` as it is a percentage. +A value of 1 means that on average each 100 incoming queries lead to a single query to an authoritative server. 5.0.6 to 5.1.0 -------------- diff --git a/pdns/recursordist/pdns_recursor.cc b/pdns/recursordist/pdns_recursor.cc index e4fbfff51f..9bd39a7f54 100644 --- a/pdns/recursordist/pdns_recursor.cc +++ b/pdns/recursordist/pdns_recursor.cc @@ -2117,8 +2117,6 @@ bool checkForCacheHit(bool qnameParsed, unsigned int tag, const string& data, } } - t_Counters.at(rec::Counter::packetCacheHits)++; - t_Counters.at(rec::Counter::syncresqueries)++; // XXX if (response.length() >= sizeof(struct dnsheader)) { dnsheader_aligned dh_aligned(response.data()); ageDNSPacket(response, age, dh_aligned); diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc index 8175042ce6..55564926b6 100644 --- a/pdns/recursordist/rec-main.cc +++ b/pdns/recursordist/rec-main.cc @@ -1168,12 +1168,14 @@ static void loggerBackend(const Logging::Entry& entry) g_log << urg << buf.str() << endl; } -static int ratePercentage(uint64_t nom, uint64_t denom) +static std::string ratePercentage(uint64_t nom, uint64_t denom) { if (denom == 0) { - return 0; + return "0"; } - return static_cast(round(100.0 * static_cast(nom) / static_cast(denom))); + std::ostringstream str; + str << std::setprecision(2) << std::fixed << 100.0 * static_cast(nom) / static_cast(denom); + return str.str(); } static void doStats() @@ -1181,106 +1183,84 @@ static void doStats() static time_t lastOutputTime; static uint64_t lastQueryCount; - uint64_t cacheHits = g_recCache->getCacheHits(); - uint64_t cacheMisses = g_recCache->getCacheMisses(); - uint64_t cacheSize = g_recCache->size(); + auto cacheHits = g_recCache->getCacheHits(); + auto cacheMisses = g_recCache->getCacheMisses(); + auto cacheSize = g_recCache->size(); auto rc_stats = g_recCache->stats(); auto pc_stats = g_packetCache ? g_packetCache->stats() : std::pair{0, 0}; - double rrc = ratePercentage(rc_stats.first, rc_stats.second); - double rpc = ratePercentage(pc_stats.first, pc_stats.second); - uint64_t negCacheSize = g_negCache->size(); + auto rrc = ratePercentage(rc_stats.first, rc_stats.second); + auto rpc = ratePercentage(pc_stats.first, pc_stats.second); + auto negCacheSize = g_negCache->size(); auto taskPushes = getTaskPushes(); auto taskExpired = getTaskExpired(); auto taskSize = getTaskSize(); - uint64_t pcSize = g_packetCache ? g_packetCache->size() : 0; - uint64_t pcHits = g_packetCache ? g_packetCache->getHits() : 0; - - auto log = g_slog->withName("stats"); + auto pcSize = g_packetCache ? g_packetCache->size() : 0; + auto pcHits = g_packetCache ? g_packetCache->getHits() : 0; + auto pcMisses = g_packetCache ? g_packetCache->getMisses() : 0; auto qcounter = g_Counters.sum(rec::Counter::qcounter); - auto syncresqueries = g_Counters.sum(rec::Counter::syncresqueries); auto outqueries = g_Counters.sum(rec::Counter::outqueries); auto throttledqueries = g_Counters.sum(rec::Counter::throttledqueries); auto tcpoutqueries = g_Counters.sum(rec::Counter::tcpoutqueries); auto dotoutqueries = g_Counters.sum(rec::Counter::dotoutqueries); auto outgoingtimeouts = g_Counters.sum(rec::Counter::outgoingtimeouts); - if (qcounter > 0 && (cacheHits + cacheMisses) > 0 && syncresqueries > 0 && outqueries > 0) { - if (!g_slogStructured) { - g_log << Logger::Notice << "stats: " << qcounter << " questions, " << cacheSize << " cache entries, " << negCacheSize << " negative entries, " << ratePercentage(cacheHits, cacheHits + cacheMisses) << "% cache hits" << endl; - g_log << Logger::Notice << "stats: record cache contended/acquired " << rc_stats.first << '/' << rc_stats.second << " = " << rrc << '%' << endl; - g_log << Logger::Notice << "stats: packet cache contended/acquired " << pc_stats.first << '/' << pc_stats.second << " = " << rpc << '%' << endl; - - g_log << Logger::Notice << "stats: throttle map: " - << SyncRes::getThrottledServersSize() << ", ns speeds: " - << SyncRes::getNSSpeedsSize() << ", failed ns: " - << SyncRes::getFailedServersSize() << ", ednsmap: " - << SyncRes::getEDNSStatusesSize() << ", non-resolving: " - << SyncRes::getNonResolvingNSSize() << ", saved-parentsets: " - << SyncRes::getSaveParentsNSSetsSize() - << endl; - g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(outqueries, syncresqueries) << "%"; - g_log << Logger::Notice << ", " << ratePercentage(throttledqueries, outqueries + throttledqueries) << "% throttled" << endl; - g_log << Logger::Notice << "stats: " << tcpoutqueries << "/" << dotoutqueries << "/" << getCurrentIdleTCPConnections() << " outgoing tcp/dot/idle connections, " << broadcastAccFunction(pleaseGetConcurrentQueries) << " queries running, " << outgoingtimeouts << " outgoing timeouts " << endl; - - g_log << Logger::Notice << "stats: " << pcSize << " packet cache entries, " << ratePercentage(pcHits, qcounter) << "% packet cache hits" << endl; - - g_log << Logger::Notice << "stats: tasks pushed/expired/queuesize: " << taskPushes << '/' << taskExpired << '/' << taskSize << endl; - } - else { - const string report = "Periodic statistics report"; - log->info(Logr::Info, report, - "questions", Logging::Loggable(qcounter), - "cache-entries", Logging::Loggable(cacheSize), - "negcache-entries", Logging::Loggable(negCacheSize), - "record-cache-hitratio-perc", Logging::Loggable(ratePercentage(cacheHits, cacheHits + cacheMisses)), - "record-cache-contended", Logging::Loggable(rc_stats.first), - "record-cache-acquired", Logging::Loggable(rc_stats.second), - "record-cache-contended-perc", Logging::Loggable(rrc), - "packetcache-contended", Logging::Loggable(pc_stats.first), - "packetcache-acquired", Logging::Loggable(pc_stats.second), - "packetcache-contended-perc", Logging::Loggable(rpc)); - log->info(Logr::Info, report, - "throttle-entries", Logging::Loggable(SyncRes::getThrottledServersSize()), - "nsspeed-entries", Logging::Loggable(SyncRes::getNSSpeedsSize()), - "failed-host-entries", Logging::Loggable(SyncRes::getFailedServersSize()), - "edns-entries", Logging::Loggable(SyncRes::getEDNSStatusesSize()), - "non-resolving-nameserver-entries", Logging::Loggable(SyncRes::getNonResolvingNSSize()), - "saved-parent-ns-sets-entries", Logging::Loggable(SyncRes::getSaveParentsNSSetsSize()), - "outqueries-per-query", Logging::Loggable(ratePercentage(outqueries, syncresqueries))); - log->info(Logr::Info, report, - "throttled-queries-perc", Logging::Loggable(ratePercentage(throttledqueries, outqueries + throttledqueries)), - "tcp-outqueries", Logging::Loggable(tcpoutqueries), - "dot-outqueries", Logging::Loggable(dotoutqueries), - "idle-tcpout-connections", Logging::Loggable(getCurrentIdleTCPConnections()), - "concurrent-queries", Logging::Loggable(broadcastAccFunction(pleaseGetConcurrentQueries)), - "outgoing-timeouts", Logging::Loggable(outgoingtimeouts)); - log->info(Logr::Info, report, - "packetcache-entries", Logging::Loggable(pcSize), - "packetcache-hitratio-perc", Logging::Loggable(ratePercentage(pcHits, qcounter)), - "taskqueue-pushed", Logging::Loggable(taskPushes), - "taskqueue-expired", Logging::Loggable(taskExpired), - "taskqueue-size", Logging::Loggable(taskSize)); - } + + auto log = g_slog->withName("stats"); + + if (qcounter > 0) { + const string report = "Periodic statistics report"; + log->info(Logr::Info, report, + "questions", Logging::Loggable(qcounter), + "cache-entries", Logging::Loggable(cacheSize), + "negcache-entries", Logging::Loggable(negCacheSize), + "record-cache-hitratio-perc", Logging::Loggable(ratePercentage(cacheHits, cacheHits + cacheMisses)), + "record-cache-contended", Logging::Loggable(rc_stats.first), + "record-cache-acquired", Logging::Loggable(rc_stats.second), + "record-cache-contended-perc", Logging::Loggable(rrc)); + log->info(Logr::Info, report, + "packetcache-contended", Logging::Loggable(pc_stats.first), + "packetcache-acquired", Logging::Loggable(pc_stats.second), + "packetcache-contended-perc", Logging::Loggable(rpc), + "packetcache-entries", Logging::Loggable(pcSize), + "packetcache-hitratio-perc", Logging::Loggable(ratePercentage(pcHits, pcHits + pcMisses))); + log->info(Logr::Info, report, + "throttle-entries", Logging::Loggable(SyncRes::getThrottledServersSize()), + "nsspeed-entries", Logging::Loggable(SyncRes::getNSSpeedsSize()), + "failed-host-entries", Logging::Loggable(SyncRes::getFailedServersSize()), + "edns-entries", Logging::Loggable(SyncRes::getEDNSStatusesSize()), + "non-resolving-nameserver-entries", Logging::Loggable(SyncRes::getNonResolvingNSSize()), + "saved-parent-ns-sets-entries", Logging::Loggable(SyncRes::getSaveParentsNSSetsSize())); + log->info(Logr::Info, report, + "throttled-queries-perc", Logging::Loggable(ratePercentage(throttledqueries, outqueries + throttledqueries)), + "outqueries", Logging::Loggable(outqueries), + "tcp-outqueries", Logging::Loggable(tcpoutqueries), + "dot-outqueries", Logging::Loggable(dotoutqueries), + "idle-tcpout-connections", Logging::Loggable(getCurrentIdleTCPConnections()), + "concurrent-queries", Logging::Loggable(broadcastAccFunction(pleaseGetConcurrentQueries)), + "outgoing-timeouts", Logging::Loggable(outgoingtimeouts), + "outqueries-per-query-perc", Logging::Loggable(ratePercentage(outqueries, qcounter))); + log->info(Logr::Info, report, + "taskqueue-pushed", Logging::Loggable(taskPushes), + "taskqueue-expired", Logging::Loggable(taskExpired), + "taskqueue-size", Logging::Loggable(taskSize)); + size_t idx = 0; for (const auto& threadInfo : RecThreadInfo::infos()) { if (threadInfo.isWorker()) { - SLOG(g_log << Logger::Notice << "stats: thread " << idx << " has been distributed " << threadInfo.getNumberOfDistributedQueries() << " queries" << endl, - log->info(Logr::Info, "Queries handled by thread", "thread", Logging::Loggable(idx), "tname", Logging::Loggable(threadInfo.getName()), "count", Logging::Loggable(threadInfo.getNumberOfDistributedQueries()))); + log->info(Logr::Info, "Queries handled by thread", "thread", Logging::Loggable(idx), "tname", Logging::Loggable(threadInfo.getName()), "count", Logging::Loggable(threadInfo.getNumberOfDistributedQueries())); ++idx; } } time_t now = time(nullptr); if (lastOutputTime != 0 && lastQueryCount != 0 && now != lastOutputTime) { - SLOG(g_log << Logger::Notice << "stats: " << (qcounter - lastQueryCount) / (now - lastOutputTime) << " qps (average over " << (now - lastOutputTime) << " seconds)" << endl, - log->info(Logr::Info, "Periodic QPS report", "qps", Logging::Loggable((qcounter - lastQueryCount) / (now - lastOutputTime)), - "averagedOver", Logging::Loggable(now - lastOutputTime))); + log->info(Logr::Info, "Periodic QPS report", "qps", Logging::Loggable((qcounter - lastQueryCount) / (now - lastOutputTime)), + "averagedOver", Logging::Loggable(now - lastOutputTime)); } lastOutputTime = now; lastQueryCount = qcounter; } else if (statsWanted) { - SLOG(g_log << Logger::Notice << "stats: no stats yet!" << endl, - log->info(Logr::Notice, "No stats yet")); + log->info(Logr::Notice, "No stats yet"); } statsWanted = false; diff --git a/pdns/recursordist/rec-tcounters.hh b/pdns/recursordist/rec-tcounters.hh index a696c90b56..80f7e41353 100644 --- a/pdns/recursordist/rec-tcounters.hh +++ b/pdns/recursordist/rec-tcounters.hh @@ -37,7 +37,6 @@ namespace rec // Simple counters enum class Counter : uint8_t { - syncresqueries, outgoingtimeouts, outgoing4timeouts, outgoing6timeouts, @@ -79,7 +78,6 @@ enum class Counter : uint8_t ednsPingMismatches, noPingOutQueries, noEdnsOutQueries, - packetCacheHits, noPacketError, ignoredCount, emptyQueriesCount, diff --git a/pdns/recursordist/syncres.cc b/pdns/recursordist/syncres.cc index 7fe0c309dd..753ed5a553 100644 --- a/pdns/recursordist/syncres.cc +++ b/pdns/recursordist/syncres.cc @@ -764,7 +764,6 @@ bool SyncRes::addAdditionals(QType qtype, vector& ret, unsigned int d int SyncRes::beginResolve(const DNSName& qname, const QType qtype, QClass qclass, vector& ret, unsigned int depth) { d_eventTrace.add(RecEventTrace::SyncRes); - t_Counters.at(rec::Counter::syncresqueries)++; d_wasVariable = false; d_wasOutOfBand = false; d_cutStates.clear();