]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
Introducing TCounters
authorOtto Moerbeek <otto.moerbeek@open-xchange.com>
Tue, 8 Nov 2022 10:10:54 +0000 (11:10 +0100)
committerOtto Moerbeek <otto.moerbeek@open-xchange.com>
Mon, 12 Dec 2022 12:14:02 +0000 (13:14 +0100)
This is a mostly lockless (and not using atomics) way to keep track of
counters and other metrics.

Atomic value are more expensive than you would think (especially if
your platform has no native atomic support for your data type), and
using locking all the time for often updated counters is very
expensive as well.

The idea for `TCounters` is based on

https://github.com/ahupowerdns/mcounter

But addresses the issues raised in
https://github.com/ahupowerdns/mcounter/issues/3

Templates are used, the application has to provide a specific class to
hold the values and enums to index these values.  The application
specific class also has to provide a `merge()` method to merge two
instances of the application specific data.  For counters that is
simple: just add them. Averages (or histogrfam) requires a bit more
work. This is demonstrated in `rec-tcounters.{cc,hh}`

At the end of a body of work the application's threads should call the
`updateSnap()` function. If a certain amount of time has passed since
the last time, a thread local snapshot of the thread local data will
be created in a thread-safe way.

The class that collects the aggregated values reads (also in a thread
safe way) from the snapshot values in each thread.

Updates of individual counters are done on thread-local data,
potentially many times per second. The snaps contain a consistent set
of the values and are taken by default once per 100ms, so reletively
seldom.

By using the snap mnechanism the aggragate values computed are based
on internally consistent counter values (as long as related counters
are updated from the same thread). A (small) drawback is that the
values computed might be a bit out of date.

The snapshot approach was suggested by @wojas.

This PR de demonstrates `TCounters` for a few Recursor metrics: simple
counters and double typed average values. For the latter weights are
kept, so that the average of averages can be computed in a proper way.

21 files changed:
pdns/histogram.hh
pdns/lwres.cc
pdns/pdns_recursor.cc
pdns/rec_channel_rec.cc
pdns/recursordist/Makefile.am
pdns/recursordist/rec-main.cc
pdns/recursordist/rec-main.hh
pdns/recursordist/rec-tcounters.cc [new file with mode: 0644]
pdns/recursordist/rec-tcounters.hh [new file with mode: 0644]
pdns/recursordist/rec-tcp.cc
pdns/recursordist/tcounters.hh [new symlink]
pdns/recursordist/test-rec-tcounters_cc.cc [new file with mode: 0644]
pdns/recursordist/test-syncres_cc.cc
pdns/syncres.cc
pdns/syncres.hh
pdns/tcounters.hh [new file with mode: 0644]
regression-tests.recursor-dnssec/recursortests.py
regression-tests.recursor-dnssec/test_AggressiveNSECCache.py
regression-tests.recursor-dnssec/test_RootNXTrust.py
regression-tests.recursor-dnssec/test_SimpleDoT.py
regression-tests.recursor-dnssec/test_SimpleForwardOverDoT.py

index 7cfbcac343e27905ea03b6f9f69822c977903efc..931841eb5d60fdc09ff87fabc1b5baa58aa1cb83 100644 (file)
@@ -21,6 +21,7 @@
  */
 #pragma once
 
+#include <cassert>
 #include <algorithm>
 #include <limits>
 #include <stdexcept>
@@ -35,8 +36,10 @@ namespace pdns
 // By convention, we are using microsecond units
 struct Bucket
 {
-  const std::string d_name;
-  const uint64_t d_boundary{0};
+  Bucket(std::string name, uint64_t boundary, uint64_t val) :
+    d_name(std::move(name)), d_boundary(boundary), d_count(val) {}
+  std::string d_name;
+  uint64_t d_boundary{0};
   mutable uint64_t d_count{0};
 };
 
@@ -147,9 +150,22 @@ public:
     d_sum += d;
   }
 
+  BaseHistogram& operator+=(const BaseHistogram& rhs)
+  {
+    assert(d_name == rhs.d_name);
+    assert(d_buckets.size() == rhs.d_buckets.size());
+    for (size_t bucket = 0; bucket < d_buckets.size(); ++bucket) {
+      assert(d_buckets[bucket].d_name == rhs.d_buckets[bucket].d_name);
+      assert(d_buckets[bucket].d_boundary == rhs.d_buckets[bucket].d_boundary);
+      d_buckets[bucket].d_count += rhs.d_buckets[bucket].d_count;
+    }
+    d_sum += rhs.d_sum;
+    return *this;
+  }
+
 private:
   std::vector<B> d_buckets;
-  const std::string d_name;
+  std::string d_name;
   mutable SumType d_sum{0};
 
   std::vector<uint64_t> to125(uint64_t start, int num)
index 7cc80f4e5961c58379e3dea697db168fe496369c..aa1c7889c159d80ad985180e2ba3b60216f714a0 100644 (file)
@@ -466,7 +466,7 @@ static LWResult::Result asyncresolve(const ComboAddress& ip, const DNSName& doma
   if(!doTCP) {
     int queryfd;
     if (ip.sin4.sin_family==AF_INET6) {
-      g_stats.ipv6queries++;
+      t_Counters.at(rec::Counter::ipv6queries)++;
     }
 
     ret = asendto((const char*)&*vpacket.begin(), vpacket.size(), 0, ip, qid, domain, type, &queryfd);
@@ -624,7 +624,7 @@ static LWResult::Result asyncresolve(const ComboAddress& ip, const DNSName& doma
 
     lwr->d_rcode = RCode::FormErr;
     lwr->d_validpacket = false;
-    g_stats.serverParseError++;
+    t_Counters.at(rec::Counter::serverParseError)++;
 
     if(outgoingLoggers) {
       logIncomingResponse(outgoingLoggers, context ? context->d_initialRequestId : boost::none, uuid, ip, domain, type, qid, doTCP, dnsOverTLS, srcmask, len, lwr->d_rcode, lwr->d_records, queryTime, exportTypes);
@@ -637,7 +637,7 @@ static LWResult::Result asyncresolve(const ComboAddress& ip, const DNSName& doma
          g_slogout->info(Logr::Notice, "Unknown error parsing packet from remote server", "server", Logging::Loggable(ip)));
   }
 
-  g_stats.serverParseError++;
+  t_Counters.at(rec::Counter::serverParseError)++;
 
  out:
   if (!lwr->d_rcode) {
index ee826d784da70d37efe5adc2a3f7d1aac880fba5..b88c1987e3846efbf924c2e088f7271d415803c8 100644 (file)
@@ -338,7 +338,7 @@ LWResult::Result arecvfrom(PacketBuffer& packet, int flags, const ComboAddress&
                              "nearmisslimit", Logging::Loggable(nearMissLimit),
                              "qname", Logging::Loggable(domain),
                              "from", Logging::Loggable(fromaddr)));
-      g_stats.spoofCount++;
+      t_Counters.at(rec::Counter::spoofCount)++;
       return LWResult::Result::Spoofed;
     }
 
@@ -366,13 +366,13 @@ static void updateResponseStats(int res, const ComboAddress& remote, unsigned in
       if (query && t_servfailqueryring) // packet cache
         t_servfailqueryring->push_back({*query, qtype});
     }
-    g_stats.servFails++;
+    ++t_Counters.at(rec::Counter::servFails);
     break;
   case RCode::NXDomain:
-    g_stats.nxDomains++;
+    ++t_Counters.at(rec::Counter::nxDomains);
     break;
   case RCode::NoError:
-    g_stats.noErrors++;
+    t_Counters.at(rec::Counter::noErrors)++;
     break;
   }
 }
@@ -505,7 +505,7 @@ static PolicyResult handlePolicyHit(const DNSFilterEngine::Policy& appliedPolicy
 
   case DNSFilterEngine::PolicyKind::Drop:
     tcpGuard.setDropOnIdle();
-    ++g_stats.policyDrops;
+    ++t_Counters.at(rec::Counter::policyDrops);
     return PolicyResult::Drop;
 
   case DNSFilterEngine::PolicyKind::NXDOMAIN:
@@ -594,7 +594,7 @@ static void sendNODLookup(Logr::log_t nodlogger, const DNSName& dname)
       if (g_logCommonErrors) {
         nodlogger->v(10)->error(Logr::Error, "DNSName too long", "Unable to send NOD lookup");
       }
-      ++g_stats.nodLookupsDroppedOversize;
+      ++t_Counters.at(rec::Counter::nodLookupsDroppedOversize);
       return;
     }
     nodlogger->v(10)->info(Logr::Debug, "Sending NOD lookup", "nodqname", Logging::Loggable(qname));
@@ -721,7 +721,7 @@ int getFakeAAAARecords(const DNSName& qname, ComboAddress prefix, vector<DNSReco
                 }),
               ret.end());
   }
-  g_stats.dns64prefixanswers++;
+  t_Counters.at(rec::Counter::dns64prefixanswers)++;
   return rcode;
 }
 
@@ -752,7 +752,7 @@ int getFakePTRRecords(const DNSName& qname, vector<DNSRecord>& ret)
   auto log = g_slog->withName("dns64")->withValues("method", Logging::Loggable("getPTR"));
   int rcode = directResolve(DNSName(newquery), QType::PTR, QClass::IN, ret, t_pdl, log);
 
-  g_stats.dns64prefixanswers++;
+  t_Counters.at(rec::Counter::dns64prefixanswers)++;
   return rcode;
 }
 
@@ -952,14 +952,14 @@ void startDoResolve(void* p)
       // Does the requestor want DNSSEC records?
       if (edo.d_extFlags & EDNSOpts::DNSSECOK) {
         DNSSECOK = true;
-        g_stats.dnssecQueries++;
+        t_Counters.at(rec::Counter::dnssecQueries)++;
       }
       if (dc->d_mdp.d_header.cd) {
         /* Per rfc6840 section 5.9, "When processing a request with
            the Checking Disabled (CD) bit set, a resolver SHOULD attempt
            to return all response data, even data that has failed DNSSEC
            validation. */
-        ++g_stats.dnssecCheckDisabledQueries;
+        ++t_Counters.at(rec::Counter::dnssecCheckDisabledQueries);
       }
       if (dc->d_mdp.d_header.ad) {
         /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
@@ -967,7 +967,7 @@ void startDoResolve(void* p)
            value of the AD bit in the response.  This allows a requester to
            indicate that it understands the AD bit without also requesting
            DNSSEC data via the DO bit. */
-        ++g_stats.dnssecAuthenticDataQueries;
+        ++t_Counters.at(rec::Counter::dnssecAuthenticDataQueries);
       }
     }
     else {
@@ -1165,7 +1165,7 @@ void startDoResolve(void* p)
       }
       catch (const ImmediateQueryDropException& e) {
         // XXX We need to export a protobuf message (and do a NOD lookup) if requested!
-        g_stats.policyDrops++;
+        t_Counters.at(rec::Counter::policyDrops)++;
         SLOG(g_log << Logger::Debug << "Dropping query because of a filtering policy " << makeLoginfo(dc) << endl,
              sr.d_slog->info(Logr::Debug, "Dropping query because of a filtering policy"));
         return;
@@ -1286,7 +1286,7 @@ void startDoResolve(void* p)
     if (res == -1) {
       pw.getHeader()->rcode = RCode::ServFail;
       // no commit here, because no record
-      g_stats.servFails++;
+      ++t_Counters.at(rec::Counter::servFails);
     }
     else {
       pw.getHeader()->rcode = res;
@@ -1584,7 +1584,7 @@ void startDoResolve(void* p)
 #endif /* NOD_ENABLED */
 
     if (variableAnswer || sr.wasVariable()) {
-      g_stats.variableResponses++;
+      t_Counters.at(rec::Counter::variableResponses)++;
     }
 
     if (t_protobufServers.servers && !(luaconfsLocal->protobufExportConfig.taggedOnly && appliedPolicy.getName().empty() && dc->d_policyTags.empty())) {
@@ -1753,19 +1753,19 @@ void startDoResolve(void* p)
       }
     }
 
-    g_stats.answers(spentUsec);
-    g_stats.cumulativeAnswers(spentUsec);
+    t_Counters.at(rec::Histogram::answers)(spentUsec);
+    t_Counters.at(rec::Histogram::cumulativeAnswers)(spentUsec);
 
     double newLat = spentUsec;
     newLat = min(newLat, g_networkTimeoutMsec * 1000.0); // outliers of several minutes exist..
-    g_stats.avgLatencyUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyUsec + newLat / g_latencyStatSize;
+    t_Counters.at(rec::DoubleWAvgCounter::avgLatencyUsec).addToRollingAvg(newLat, g_latencyStatSize);
     // no worries, we do this for packet cache hits elsewhere
 
     if (spentUsec >= sr.d_totUsec) {
       uint64_t ourtime = spentUsec - sr.d_totUsec;
-      g_stats.ourtime(ourtime);
+      t_Counters.at(rec::Histogram::ourtime)(ourtime);
       newLat = ourtime; // usec
-      g_stats.avgLatencyOursUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyOursUsec + newLat / g_latencyStatSize;
+      t_Counters.at(rec::DoubleWAvgCounter::avgLatencyOursUsec).addToRollingAvg(newLat, g_latencyStatSize);
     }
 
 #ifdef NOD_ENABLED
@@ -1807,9 +1807,13 @@ void startDoResolve(void* p)
          sr.d_slog->info(Logr::Error, "Any other exception in a resolver context"));
   }
 
+  if (g_regressionTestMode) {
+    t_Counters.updateSnap(g_regressionTestMode);
+  }
   runTaskOnce(g_logCommonErrors);
 
-  g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage.load());
+  t_Counters.at(rec::Counter::maxMThreadStackUsage) = max(MT->getMaxStackUsage(), t_Counters.at(rec::Counter::maxMThreadStackUsage));
+  t_Counters.updateSnap(g_regressionTestMode);
 }
 
 void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
@@ -1916,19 +1920,20 @@ bool checkForCacheHit(bool qnameParsed, unsigned int tag, const string& data,
       }
     }
 
-    g_stats.packetCacheHits++;
-    SyncRes::s_queries++;
+    t_Counters.at(rec::Counter::packetCacheHits)++;
+    t_Counters.at(rec::Counter::syncresqueries)++; // XXX
     ageDNSPacket(response, age);
     if (response.length() >= sizeof(struct dnsheader)) {
       const struct dnsheader* dh = reinterpret_cast<const dnsheader*>(response.data());
       updateResponseStats(dh->rcode, source, response.length(), 0, 0);
     }
-    g_stats.avgLatencyUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
-    g_stats.avgLatencyOursUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
+    // we assume 0 usec
+    t_Counters.at(rec::DoubleWAvgCounter::avgLatencyUsec).addToRollingAvg(0.0, g_latencyStatSize);
+    t_Counters.at(rec::DoubleWAvgCounter::avgLatencyOursUsec).addToRollingAvg(0.0, g_latencyStatSize);
 #if 0
     // XXX changes behaviour compared to old code!
-    g_stats.answers(0);
-    g_stats.ourtime(0);
+    t_Counters.at(rec::Counter::answers)(0);
+    t_Counters.at(rec::Counter::ourtime)(0);
 #endif
   }
 
@@ -1977,14 +1982,15 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
     double delta = (diff.tv_sec * 1000 + diff.tv_usec / 1000.0);
 
     if (delta > 1000.0) {
-      g_stats.tooOldDrops++;
+      t_Counters.at(rec::Counter::tooOldDrops)++;
       return nullptr;
     }
   }
 
-  ++g_stats.qcounter;
+  ++t_Counters.at(rec::Counter::qcounter);
+
   if (fromaddr.sin4.sin_family == AF_INET6)
-    g_stats.ipv6qcounter++;
+    t_Counters.at(rec::Counter::ipv6qcounter)++;
 
   string response;
   const dnsheader_aligned headerdata(question.data());
@@ -2142,7 +2148,8 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
         struct timeval now;
         Utility::gettimeofday(&now, nullptr);
         uint64_t spentUsec = uSec(now - tv);
-        g_stats.cumulativeAnswers(spentUsec);
+        t_Counters.at(rec::Histogram::cumulativeAnswers)(spentUsec);
+        t_Counters.updateSnap(g_regressionTestMode);
         return 0;
       }
     }
@@ -2162,7 +2169,7 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
         SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] DROPPED question from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " based on policy" << endl,
              g_slogudpin->info(Logr::Notice, "Dropped question based on policy", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
       }
-      g_stats.policyDrops++;
+      t_Counters.at(rec::Counter::policyDrops)++;
       return 0;
     }
   }
@@ -2174,7 +2181,7 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
              g_slogudpin->info(Logr::Notice, "Dropping UDP NOTIFY, zone not matched by allow-notify-for", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
       }
 
-      g_stats.zoneDisallowedNotify++;
+      t_Counters.at(rec::Counter::zoneDisallowedNotify)++;
       return 0;
     }
 
@@ -2197,7 +2204,7 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
       SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] DROPPED question from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << ", over capacity" << endl,
            g_slogudpin->info(Logr::Notice, "Dropped question, over capacity", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
 
-    g_stats.overCapacityDrops++;
+    t_Counters.at(rec::Counter::overCapacityDrops)++;
     return 0;
   }
 
@@ -2270,7 +2277,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
       firstQuery = false;
 
       if (msgh.msg_flags & MSG_TRUNC) {
-        g_stats.truncatedDrops++;
+        t_Counters.at(rec::Counter::truncatedDrops)++;
         if (!g_quiet) {
           SLOG(g_log << Logger::Error << "Ignoring truncated query from " << fromaddr.toString() << endl,
                g_slogudpin->info(Logr::Error, "Ignoring truncated query", "remote", Logging::Loggable(fromaddr)));
@@ -2284,7 +2291,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
         bool tcp;
         ssize_t used = parseProxyHeader(data, proxyProto, source, destination, tcp, proxyProtocolValues);
         if (used <= 0) {
-          ++g_stats.proxyProtocolInvalidCount;
+          ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
           if (!g_quiet) {
             SLOG(g_log << Logger::Error << "Ignoring invalid proxy protocol (" << std::to_string(len) << ", " << std::to_string(used) << ") query from " << fromaddr.toStringWithPort() << endl,
                  g_slogudpin->info(Logr::Error, "Ignoring invalid proxy protocol query", "length", Logging::Loggable(len),
@@ -2298,7 +2305,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                  g_slogudpin->info(Logr::Error, "Proxy protocol header in UDP packet  is larger than proxy-protocol-maximum-size",
                                    "used", Logging::Loggable(used), "remote", Logging::Loggable(fromaddr)));
           }
-          ++g_stats.proxyProtocolInvalidCount;
+          ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
           return;
         }
 
@@ -2306,7 +2313,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
       }
       else if (len > 512) {
         /* we only allow UDP packets larger than 512 for those with a proxy protocol header */
-        g_stats.truncatedDrops++;
+        t_Counters.at(rec::Counter::truncatedDrops)++;
         if (!g_quiet) {
           SLOG(g_log << Logger::Error << "Ignoring truncated query from " << fromaddr.toStringWithPort() << endl,
                g_slogudpin->info(Logr::Error, "Ignoring truncated query", "remote", Logging::Loggable(fromaddr)));
@@ -2315,7 +2322,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
       }
 
       if (data.size() < sizeof(dnsheader)) {
-        g_stats.ignoredCount++;
+        t_Counters.at(rec::Counter::ignoredCount)++;
         if (!g_quiet) {
           SLOG(g_log << Logger::Error << "Ignoring too-short (" << std::to_string(data.size()) << ") query from " << fromaddr.toString() << endl,
                g_slogudpin->info(Logr::Error, "Ignoring too-short query", "length", Logging::Loggable(data.size()),
@@ -2344,7 +2351,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                g_slogudpin->info(Logr::Error, "Dropping UDP query, address not matched by allow-from", "source", Logging::Loggable(mappedSource)));
         }
 
-        g_stats.unauthorizedUDP++;
+        t_Counters.at(rec::Counter::unauthorizedUDP)++;
         return;
       }
 
@@ -2355,7 +2362,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                g_slogudpin->info(Logr::Error, "Dropping UDP query can't deal with port 0", "remote", Logging::Loggable(fromaddr)));
         }
 
-        g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
+        t_Counters.at(rec::Counter::clientParseError)++; // not quite the best place to put it, but needs to go somewhere
         return;
       }
 
@@ -2364,21 +2371,21 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
         const dnsheader* dh = headerdata.get();
 
         if (dh->qr) {
-          g_stats.ignoredCount++;
+          t_Counters.at(rec::Counter::ignoredCount)++;
           if (g_logCommonErrors) {
             SLOG(g_log << Logger::Error << "Ignoring answer from " << fromaddr.toString() << " on server socket!" << endl,
                  g_slogudpin->info(Logr::Error, "Ignoring answer on server socket", "remote", Logging::Loggable(fromaddr)));
           }
         }
         else if (dh->opcode != Opcode::Query && dh->opcode != Opcode::Notify) {
-          g_stats.ignoredCount++;
+          t_Counters.at(rec::Counter::ignoredCount)++;
           if (g_logCommonErrors) {
             SLOG(g_log << Logger::Error << "Ignoring unsupported opcode " << Opcode::to_s(dh->opcode) << " from " << fromaddr.toString() << " on server socket!" << endl,
                  g_slogudpin->info(Logr::Error, "Ignoring unsupported opcode server socket", "remote", Logging::Loggable(fromaddr), "opcode", Logging::Loggable(Opcode::to_s(dh->opcode))));
           }
         }
         else if (dh->qdcount == 0) {
-          g_stats.emptyQueriesCount++;
+          t_Counters.at(rec::Counter::emptyQueriesCount)++;
           if (g_logCommonErrors) {
             SLOG(g_log << Logger::Error << "Ignoring empty (qdcount == 0) query from " << fromaddr.toString() << " on server socket!" << endl,
                  g_slogudpin->info(Logr::Error, "Ignoring empty (qdcount == 0) query on server socket!", "remote", Logging::Loggable(fromaddr)));
@@ -2393,7 +2400,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                                        "source", Logging::Loggable(mappedSource)));
               }
 
-              g_stats.sourceDisallowedNotify++;
+              t_Counters.at(rec::Counter::sourceDisallowedNotify)++;
               return;
             }
           }
@@ -2435,14 +2442,14 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
         }
       }
       catch (const MOADNSException& mde) {
-        g_stats.clientParseError++;
+        t_Counters.at(rec::Counter::clientParseError)++;
         if (g_logCommonErrors) {
           SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP client " << fromaddr.toString() << ": " << mde.what() << endl,
                g_slogudpin->error(Logr::Error, mde.what(), "Unable to parse packet from remote UDP client", "remote", Logging::Loggable(fromaddr), "exception", Logging::Loggable("MOADNSException")));
         }
       }
       catch (const std::runtime_error& e) {
-        g_stats.clientParseError++;
+        t_Counters.at(rec::Counter::clientParseError)++;
         if (g_logCommonErrors) {
           SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP client " << fromaddr.toString() << ": " << e.what() << endl,
                g_slogudpin->error(Logr::Error, e.what(), "Unable to parse packet from remote UDP client", "remote", Logging::Loggable(fromaddr), "exception", Logging::Loggable("std::runtime_error")));
@@ -2452,12 +2459,13 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
     else {
       // cerr<<t_id<<" had error: "<<stringerror()<<endl;
       if (firstQuery && errno == EAGAIN) {
-        g_stats.noPacketError++;
+        t_Counters.at(rec::Counter::noPacketError)++;
       }
 
       break;
     }
   }
+  t_Counters.updateSnap(g_regressionTestMode);
 }
 
 void makeUDPServerSockets(deferredAdd_t& deferredAdds, Logr::log_t log)
@@ -2615,7 +2623,7 @@ static unsigned int selectWorker(unsigned int hash)
   unsigned int worker = hash % RecThreadInfo::numWorkers();
   /* at least one server has to be at or below the average load */
   if (load[worker] > targetLoad) {
-    ++g_stats.rebalancedQueries;
+    ++t_Counters.at(rec::Counter::rebalancedQueries);
     do {
       worker = (worker + 1) % RecThreadInfo::numWorkers();
     } while (load[worker] > targetLoad);
@@ -2637,7 +2645,7 @@ void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
   unsigned int hash = hashQuestion(reinterpret_cast<const uint8_t*>(packet.data()), packet.length(), g_disthashseed, ok);
   if (!ok) {
     // hashQuestion does detect invalid names, so we might as well punt here instead of in the worker thread
-    g_stats.ignoredCount++;
+    t_Counters.at(rec::Counter::ignoredCount)++;
     throw MOADNSException("too-short (" + std::to_string(packet.length()) + ") or invalid name");
   }
   unsigned int target = selectWorker(hash);
@@ -2655,7 +2663,7 @@ void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
     } while (newTarget == target);
 
     if (!trySendingQueryToWorker(newTarget, tmsg)) {
-      g_stats.queryPipeFullDrops++;
+      t_Counters.at(rec::Counter::queryPipeFullDrops)++;
       delete tmsg;
     }
   }
@@ -2675,7 +2683,7 @@ static void doResends(MT_t::waiters_t::iterator& iter, const std::shared_ptr<Pac
     r->fd = -1;
     r->id = *i;
     MT->sendEvent(r, &content);
-    g_stats.chainResends++;
+    t_Counters.at(rec::Counter::chainResends)++;
   }
 }
 
@@ -2694,7 +2702,7 @@ static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
     if (len < 0)
       ; //      cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
     else {
-      g_stats.serverParseError++;
+      t_Counters.at(rec::Counter::serverParseError)++;
       if (g_logCommonErrors)
         SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP server " << fromaddr.toString() << ": packet smaller than DNS header" << endl,
              g_slogout->info(Logr::Error, "Unable to parse packet from remote UDP server", "from", Logging::Loggable(fromaddr)));
@@ -2736,7 +2744,7 @@ static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
         pident->domain = DNSName(reinterpret_cast<const char*>(packet.data()), len, 12, false, &pident->type); // don't copy this from above - we need to do the actual read
     }
     catch (std::exception& e) {
-      g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
+      t_Counters.at(rec::Counter::serverParseError)++; // won't be fed to lwres.cc, so we have to increment
       SLOG(g_log << Logger::Warning << "Error in packet from remote nameserver " << fromaddr.toStringWithPort() << ": " << e.what() << endl,
            g_slogudpin->error(Logr::Warning, "Error in packet from remote nameserver", "from", Logging::Loggable(fromaddr)));
       return;
@@ -2770,7 +2778,7 @@ retryWithName:
         goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
       }
     }
-    g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
+    t_Counters.at(rec::Counter::unexpectedCount)++; // if we made it here, it really is an unexpected answer
     if (g_logCommonErrors) {
       SLOG(g_log << Logger::Warning << "Discarding unexpected packet from " << fromaddr.toStringWithPort() << ": " << (pident->domain.empty() ? "<empty>" : pident->domain.toString()) << ", " << pident->type << ", " << MT->d_waiters.size() << " waiters" << endl,
            g_slogudpin->info(Logr::Warning, "Discarding unexpected packet", "from", Logging::Loggable(fromaddr),
index 52c93bf4189f1a21fd667b241644464edf2c4c3b..cae909fd026071aa28f4fbfaf2962257e1a2d178 100644 (file)
@@ -1149,7 +1149,7 @@ static uint64_t doGetMallocated()
   return 0;
 }
 
-static StatsMap toStatsMap(const string& name, const pdns::AtomicHistogram& histogram)
+static StatsMap toStatsMap(const string& name, const pdns::Histogram& histogram)
 {
   const auto& data = histogram.getCumulativeBuckets();
   const string pbasename = getPrometheusName(name);
@@ -1169,7 +1169,7 @@ static StatsMap toStatsMap(const string& name, const pdns::AtomicHistogram& hist
   return entries;
 }
 
-static StatsMap toStatsMap(const string& name, const pdns::AtomicHistogram& histogram4, const pdns::AtomicHistogram& histogram6)
+static StatsMap toStatsMap(const string& name, const pdns::Histogram& histogram4, const pdns::Histogram& histogram6)
 {
   const string pbasename = getPrometheusName(name);
   StatsMap entries;
@@ -1199,13 +1199,14 @@ static StatsMap toStatsMap(const string& name, const pdns::AtomicHistogram& hist
   return entries;
 }
 
-static StatsMap toAuthRCodeStatsMap(const string& name, const std::array<pdns::stat_t, 16>& v)
+static StatsMap toAuthRCodeStatsMap(const string& name)
 {
   const string pbasename = getPrometheusName(name);
   StatsMap entries;
 
   uint8_t n = 0;
-  for (const auto& entry : v) {
+  auto rcodes = g_Counters.sum(rec::RCode::auth).rcodeCounters;
+  for (const auto& entry : rcodes) {
     const auto key = RCode::to_short_s(n);
     std::string pname = pbasename + "{rcode=\"" + key + "\"}";
     entries.emplace("auth-" + key + "-answers", StatsMapEntry{pname, std::to_string(entry)});
@@ -1313,9 +1314,9 @@ static StatsMap toRemoteLoggerStatsMap(const string& name)
 
 static void registerAllStats1()
 {
-  addGetStat("questions", &g_stats.qcounter);
-  addGetStat("ipv6-questions", &g_stats.ipv6qcounter);
-  addGetStat("tcp-questions", &g_stats.tcpqcounter);
+  addGetStat("questions", [] { return g_Counters.sum(rec::Counter::qcounter); });
+  addGetStat("ipv6-questions", [] { return g_Counters.sum(rec::Counter::ipv6qcounter); });
+  addGetStat("tcp-questions", [] { return g_Counters.sum(rec::Counter::tcpqcounter); });
 
   addGetStat("cache-hits", doGetCacheHits);
   addGetStat("cache-misses", doGetCacheMisses);
@@ -1339,63 +1340,63 @@ static void registerAllStats1()
 
   addGetStat("malloc-bytes", doGetMallocated);
 
-  addGetStat("servfail-answers", &g_stats.servFails);
-  addGetStat("nxdomain-answers", &g_stats.nxDomains);
-  addGetStat("noerror-answers", &g_stats.noErrors);
-
-  addGetStat("unauthorized-udp", &g_stats.unauthorizedUDP);
-  addGetStat("unauthorized-tcp", &g_stats.unauthorizedTCP);
-  addGetStat("source-disallowed-notify", &g_stats.sourceDisallowedNotify);
-  addGetStat("zone-disallowed-notify", &g_stats.zoneDisallowedNotify);
-  addGetStat("tcp-client-overflow", &g_stats.tcpClientOverflow);
-
-  addGetStat("client-parse-errors", &g_stats.clientParseError);
-  addGetStat("server-parse-errors", &g_stats.serverParseError);
-  addGetStat("too-old-drops", &g_stats.tooOldDrops);
-  addGetStat("truncated-drops", &g_stats.truncatedDrops);
-  addGetStat("query-pipe-full-drops", &g_stats.queryPipeFullDrops);
-
-  addGetStat("answers0-1", []() { return g_stats.answers.getCount(0); });
-  addGetStat("answers1-10", []() { return g_stats.answers.getCount(1); });
-  addGetStat("answers10-100", []() { return g_stats.answers.getCount(2); });
-  addGetStat("answers100-1000", []() { return g_stats.answers.getCount(3); });
-  addGetStat("answers-slow", []() { return g_stats.answers.getCount(4); });
-
-  addGetStat("x-ourtime0-1", []() { return g_stats.ourtime.getCount(0); });
-  addGetStat("x-ourtime1-2", []() { return g_stats.ourtime.getCount(1); });
-  addGetStat("x-ourtime2-4", []() { return g_stats.ourtime.getCount(2); });
-  addGetStat("x-ourtime4-8", []() { return g_stats.ourtime.getCount(3); });
-  addGetStat("x-ourtime8-16", []() { return g_stats.ourtime.getCount(4); });
-  addGetStat("x-ourtime16-32", []() { return g_stats.ourtime.getCount(5); });
-  addGetStat("x-ourtime-slow", []() { return g_stats.ourtime.getCount(6); });
-
-  addGetStat("auth4-answers0-1", []() { return g_stats.auth4Answers.getCount(0); });
-  addGetStat("auth4-answers1-10", []() { return g_stats.auth4Answers.getCount(1); });
-  addGetStat("auth4-answers10-100", []() { return g_stats.auth4Answers.getCount(2); });
-  addGetStat("auth4-answers100-1000", []() { return g_stats.auth4Answers.getCount(3); });
-  addGetStat("auth4-answers-slow", []() { return g_stats.auth4Answers.getCount(4); });
-
-  addGetStat("auth6-answers0-1", []() { return g_stats.auth6Answers.getCount(0); });
-  addGetStat("auth6-answers1-10", []() { return g_stats.auth6Answers.getCount(1); });
-  addGetStat("auth6-answers10-100", []() { return g_stats.auth6Answers.getCount(2); });
-  addGetStat("auth6-answers100-1000", []() { return g_stats.auth6Answers.getCount(3); });
-  addGetStat("auth6-answers-slow", []() { return g_stats.auth6Answers.getCount(4); });
-
-  addGetStat("qa-latency", []() { return round(g_stats.avgLatencyUsec.load()); });
-  addGetStat("x-our-latency", []() { return round(g_stats.avgLatencyOursUsec.load()); });
-  addGetStat("unexpected-packets", &g_stats.unexpectedCount);
-  addGetStat("case-mismatches", &g_stats.caseMismatchCount);
-  addGetStat("spoof-prevents", &g_stats.spoofCount);
-
-  addGetStat("nsset-invalidations", &g_stats.nsSetInvalidations);
-
-  addGetStat("resource-limits", &g_stats.resourceLimits);
-  addGetStat("over-capacity-drops", &g_stats.overCapacityDrops);
-  addGetStat("policy-drops", &g_stats.policyDrops);
-  addGetStat("no-packet-error", &g_stats.noPacketError);
-  addGetStat("ignored-packets", &g_stats.ignoredCount);
-  addGetStat("empty-queries", &g_stats.emptyQueriesCount);
-  addGetStat("max-mthread-stack", &g_stats.maxMThreadStackUsage);
+  addGetStat("servfail-answers", [] { return g_Counters.sum(rec::Counter::servFails); });
+  addGetStat("nxdomain-answers", [] { return g_Counters.sum(rec::Counter::nxDomains); });
+  addGetStat("noerror-answers", [] { return g_Counters.sum(rec::Counter::noErrors); });
+
+  addGetStat("unauthorized-udp", [] { return g_Counters.sum(rec::Counter::unauthorizedUDP); });
+  addGetStat("unauthorized-tcp", [] { return g_Counters.sum(rec::Counter::unauthorizedTCP); });
+  addGetStat("source-disallowed-notify", [] { return g_Counters.sum(rec::Counter::sourceDisallowedNotify); });
+  addGetStat("zone-disallowed-notify", [] { return g_Counters.sum(rec::Counter::zoneDisallowedNotify); });
+  addGetStat("tcp-client-overflow", [] { return g_Counters.sum(rec::Counter::tcpClientOverflow); });
+
+  addGetStat("client-parse-errors", [] { return g_Counters.sum(rec::Counter::clientParseError); });
+  addGetStat("server-parse-errors", [] { return g_Counters.sum(rec::Counter::serverParseError); });
+  addGetStat("too-old-drops", [] { return g_Counters.sum(rec::Counter::tooOldDrops); });
+  addGetStat("truncated-drops", [] { return g_Counters.sum(rec::Counter::truncatedDrops); });
+  addGetStat("query-pipe-full-drops", [] { return g_Counters.sum(rec::Counter::queryPipeFullDrops); });
+
+  addGetStat("answers0-1", []() { return g_Counters.sum(rec::Histogram::answers).getCount(0); });
+  addGetStat("answers1-10", []() { return g_Counters.sum(rec::Histogram::answers).getCount(1); });
+  addGetStat("answers10-100", []() { return g_Counters.sum(rec::Histogram::answers).getCount(2); });
+  addGetStat("answers100-1000", []() { return g_Counters.sum(rec::Histogram::answers).getCount(3); });
+  addGetStat("answers-slow", []() { return g_Counters.sum(rec::Histogram::answers).getCount(4); });
+
+  addGetStat("x-ourtime0-1", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(0); });
+  addGetStat("x-ourtime1-2", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(1); });
+  addGetStat("x-ourtime2-4", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(2); });
+  addGetStat("x-ourtime4-8", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(3); });
+  addGetStat("x-ourtime8-16", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(4); });
+  addGetStat("x-ourtime16-32", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(5); });
+  addGetStat("x-ourtime-slow", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(6); });
+
+  addGetStat("auth4-answers0-1", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(0); });
+  addGetStat("auth4-answers1-10", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(1); });
+  addGetStat("auth4-answers10-100", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(2); });
+  addGetStat("auth4-answers100-1000", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(3); });
+  addGetStat("auth4-answers-slow", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(4); });
+
+  addGetStat("auth6-answers0-1", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(0); });
+  addGetStat("auth6-answers1-10", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(1); });
+  addGetStat("auth6-answers10-100", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(2); });
+  addGetStat("auth6-answers100-1000", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(3); });
+  addGetStat("auth6-answers-slow", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(4); });
+
+  addGetStat("qa-latency", []() { return round(g_Counters.avg(rec::DoubleWAvgCounter::avgLatencyUsec)); });
+  addGetStat("x-our-latency", []() { return round(g_Counters.avg(rec::DoubleWAvgCounter::avgLatencyOursUsec)); });
+  addGetStat("unexpected-packets", [] { return g_Counters.sum(rec::Counter::unexpectedCount); });
+  addGetStat("case-mismatches", [] { return g_Counters.sum(rec::Counter::caseMismatchCount); });
+  addGetStat("spoof-prevents", [] { return g_Counters.sum(rec::Counter::spoofCount); });
+
+  addGetStat("nsset-invalidations", [] { return g_Counters.sum(rec::Counter::nsSetInvalidations); });
+
+  addGetStat("resource-limits", [] { return g_Counters.sum(rec::Counter::resourceLimits); });
+  addGetStat("over-capacity-drops", [] { return g_Counters.sum(rec::Counter::overCapacityDrops); });
+  addGetStat("policy-drops", [] { return g_Counters.sum(rec::Counter::policyDrops); });
+  addGetStat("no-packet-error", [] { return g_Counters.sum(rec::Counter::noPacketError); });
+  addGetStat("ignored-packets", [] { return g_Counters.sum(rec::Counter::ignoredCount); });
+  addGetStat("empty-queries", [] { return g_Counters.sum(rec::Counter::emptyQueriesCount); });
+  addGetStat("max-mthread-stack", [] { return g_Counters.max(rec::Counter::maxMThreadStackUsage); });
 
   addGetStat("negcache-entries", getNegCacheSize);
   addGetStat("throttle-entries", SyncRes::getThrottledServersSize);
@@ -1406,22 +1407,22 @@ static void registerAllStats1()
 
   addGetStat("concurrent-queries", getConcurrentQueries);
   addGetStat("security-status", &g_security_status);
-  addGetStat("outgoing-timeouts", &SyncRes::s_outgoingtimeouts);
-  addGetStat("outgoing4-timeouts", &SyncRes::s_outgoing4timeouts);
-  addGetStat("outgoing6-timeouts", &SyncRes::s_outgoing6timeouts);
-  addGetStat("auth-zone-queries", &SyncRes::s_authzonequeries);
-  addGetStat("tcp-outqueries", &SyncRes::s_tcpoutqueries);
-  addGetStat("dot-outqueries", &SyncRes::s_dotoutqueries);
-  addGetStat("all-outqueries", &SyncRes::s_outqueries);
-  addGetStat("ipv6-outqueries", &g_stats.ipv6queries);
-  addGetStat("throttled-outqueries", &SyncRes::s_throttledqueries);
-  addGetStat("dont-outqueries", &SyncRes::s_dontqueries);
-  addGetStat("qname-min-fallback-success", &SyncRes::s_qnameminfallbacksuccess);
-  addGetStat("throttled-out", &SyncRes::s_throttledqueries);
-  addGetStat("unreachables", &SyncRes::s_unreachables);
+  addGetStat("outgoing-timeouts", [] { return g_Counters.sum(rec::Counter::outgoingtimeouts); });
+  addGetStat("outgoing4-timeouts", [] { return g_Counters.sum(rec::Counter::outgoing4timeouts); });
+  addGetStat("outgoing6-timeouts", [] { return g_Counters.sum(rec::Counter::outgoing6timeouts); });
+  addGetStat("auth-zone-queries", [] { return g_Counters.sum(rec::Counter::authzonequeries); });
+  addGetStat("tcp-outqueries", [] { return g_Counters.sum(rec::Counter::tcpoutqueries); });
+  addGetStat("dot-outqueries", [] { return g_Counters.sum(rec::Counter::dotoutqueries); });
+  addGetStat("all-outqueries", [] { return g_Counters.sum(rec::Counter::outqueries); });
+  addGetStat("ipv6-outqueries", [] { return g_Counters.sum(rec::Counter::ipv6queries); });
+  addGetStat("throttled-outqueries", [] { return g_Counters.sum(rec::Counter::throttledqueries); });
+  addGetStat("dont-outqueries", [] { return g_Counters.sum(rec::Counter::dontqueries); });
+  addGetStat("qname-min-fallback-success", [] { return g_Counters.sum(rec::Counter::qnameminfallbacksuccess); });
+  addGetStat("throttled-out", [] { return g_Counters.sum(rec::Counter::throttledqueries); });
+  addGetStat("unreachables", [] { return g_Counters.sum(rec::Counter::unreachables); });
   addGetStat("ecs-queries", &SyncRes::s_ecsqueries);
   addGetStat("ecs-responses", &SyncRes::s_ecsresponses);
-  addGetStat("chain-resends", &g_stats.chainResends);
+  addGetStat("chain-resends", [] { return g_Counters.sum(rec::Counter::chainResends); });
   addGetStat("tcp-clients", [] { return TCPConnection::getCurrentConnections(); });
 
 #ifdef __linux__
@@ -1437,17 +1438,17 @@ static void registerAllStats1()
   addGetStat("udp6-in-csum-errors", [] { return udp6ErrorStats("udp6-in-csum-errors"); });
 #endif
 
-  addGetStat("edns-ping-matches", &g_stats.ednsPingMatches);
-  addGetStat("edns-ping-mismatches", &g_stats.ednsPingMismatches);
-  addGetStat("dnssec-queries", &g_stats.dnssecQueries);
+  addGetStat("edns-ping-matches", [] { return g_Counters.sum(rec::Counter::ednsPingMatches); });
+  addGetStat("edns-ping-mismatches", [] { return g_Counters.sum(rec::Counter::ednsPingMismatches); });
+  addGetStat("dnssec-queries", [] { return g_Counters.sum(rec::Counter::dnssecQueries); });
 
-  addGetStat("dnssec-authentic-data-queries", &g_stats.dnssecAuthenticDataQueries);
-  addGetStat("dnssec-check-disabled-queries", &g_stats.dnssecCheckDisabledQueries);
+  addGetStat("dnssec-authentic-data-queries", [] { return g_Counters.sum(rec::Counter::dnssecAuthenticDataQueries); });
+  addGetStat("dnssec-check-disabled-queries", [] { return g_Counters.sum(rec::Counter::dnssecCheckDisabledQueries); });
 
-  addGetStat("variable-responses", &g_stats.variableResponses);
+  addGetStat("variable-responses", [] { return g_Counters.sum(rec::Counter::variableResponses); });
 
-  addGetStat("noping-outqueries", &g_stats.noPingOutQueries);
-  addGetStat("noedns-outqueries", &g_stats.noEdnsOutQueries);
+  addGetStat("noping-outqueries", [] { return g_Counters.sum(rec::Counter::noPingOutQueries); });
+  addGetStat("noedns-outqueries", [] { return g_Counters.sum(rec::Counter::noEdnsOutQueries); });
 
   addGetStat("uptime", calculateUptime);
   addGetStat("real-memory-usage", [] { return getRealMemoryUsage(string()); });
@@ -1471,7 +1472,7 @@ static void registerAllStats1()
   addGetStat("memory-allocated", [] { return g_mtracer->getTotAllocated(string()); });
 #endif
 
-  addGetStat("dnssec-validations", &g_stats.dnssecValidations);
+  addGetStat("dnssec-validations", [] { return g_Counters.sum(rec::Counter::dnssecValidations); });
   addGetStat("dnssec-result-insecure", &g_stats.dnssecResults[vState::Insecure]);
   addGetStat("dnssec-result-secure", &g_stats.dnssecResults[vState::Secure]);
   addGetStat("dnssec-result-bogus", []() {
@@ -1538,17 +1539,17 @@ static void registerAllStats1()
   addGetStat("policy-result-truncate", &g_stats.policyResults[DNSFilterEngine::PolicyKind::Truncate]);
   addGetStat("policy-result-custom", &g_stats.policyResults[DNSFilterEngine::PolicyKind::Custom]);
 
-  addGetStat("rebalanced-queries", &g_stats.rebalancedQueries);
+  addGetStat("rebalanced-queries", [] { return g_Counters.sum(rec::Counter::rebalancedQueries); });
 
-  addGetStat("proxy-protocol-invalid", &g_stats.proxyProtocolInvalidCount);
+  addGetStat("proxy-protocol-invalid", [] { return g_Counters.sum(rec::Counter::proxyProtocolInvalidCount); });
 
-  addGetStat("nod-lookups-dropped-oversize", &g_stats.nodLookupsDroppedOversize);
+  addGetStat("nod-lookups-dropped-oversize", [] { return g_Counters.sum(rec::Counter::nodLookupsDroppedOversize); });
 
   addGetStat("taskqueue-pushed", []() { return getTaskPushes(); });
   addGetStat("taskqueue-expired", []() { return getTaskExpired(); });
   addGetStat("taskqueue-size", []() { return getTaskSize(); });
 
-  addGetStat("dns64-prefix-answers", &g_stats.dns64prefixanswers);
+  addGetStat("dns64-prefix-answers", [] { return g_Counters.sum(rec::Counter::dns64prefixanswers); });
 
   addGetStat("almost-expired-pushed", []() { return getAlmostExpiredTasksPushed(); });
   addGetStat("almost-expired-run", []() { return getAlmostExpiredTasksRun(); });
@@ -1556,8 +1557,8 @@ static void registerAllStats1()
 
   addGetStat("idle-tcpout-connections", getCurrentIdleTCPConnections);
 
-  addGetStat("maintenance-usec", &g_stats.maintenanceUsec);
-  addGetStat("maintenance-calls", &g_stats.maintenanceCalls);
+  addGetStat("maintenance-usec", [] { return g_Counters.sum(rec::Counter::maintenanceUsec); });
+  addGetStat("maintenance-calls", [] { return g_Counters.sum(rec::Counter::maintenanceCalls); });
 
   /* make sure that the ECS stats are properly initialized */
   SyncRes::clearECSStats();
@@ -1571,10 +1572,10 @@ static void registerAllStats1()
   }
 
   addGetStat("cumul-clientanswers", []() {
-    return toStatsMap(g_stats.cumulativeAnswers.getName(), g_stats.cumulativeAnswers);
+    return toStatsMap(t_Counters.at(rec::Histogram::cumulativeAnswers).getName(), g_Counters.sum(rec::Histogram::cumulativeAnswers));
   });
   addGetStat("cumul-authanswers", []() {
-    return toStatsMap(g_stats.cumulativeAuth4Answers.getName(), g_stats.cumulativeAuth4Answers, g_stats.cumulativeAuth6Answers);
+    return toStatsMap(t_Counters.at(rec::Histogram::cumulativeAuth4Answers).getName(), g_Counters.sum(rec::Histogram::cumulativeAuth4Answers), g_Counters.sum(rec::Histogram::cumulativeAuth6Answers));
   });
   addGetStat("policy-hits", []() {
     return toRPZStatsMap("policy-hits", g_stats.policyHits);
@@ -1583,7 +1584,7 @@ static void registerAllStats1()
     return toProxyMappingStatsMap("proxy-mapping-total");
   });
   addGetStat("auth-rcode-answers", []() {
-    return toAuthRCodeStatsMap("auth-rcode-answers", g_stats.authRCode);
+    return toAuthRCodeStatsMap("auth-rcode-answers");
   });
   addGetStat("remote-logger-count", []() {
     return toRemoteLoggerStatsMap("remote-logger-count");
index 629976e0d15aeb025b53172422ab19d2e81e8838..0371e08c3f22b74c632cab42c9d345659a326309 100644 (file)
@@ -171,6 +171,7 @@ pdns_recursor_SOURCES = \
        rec-protozero.cc rec-protozero.hh \
        rec-snmp.hh rec-snmp.cc \
        rec-taskqueue.cc rec-taskqueue.hh \
+       rec-tcounters.cc rec-tcounters.hh \
        rec-tcp.cc \
        rec-tcpout.cc rec-tcpout.hh \
        rec-zonetocache.cc rec-zonetocache.hh \
@@ -201,6 +202,7 @@ pdns_recursor_SOURCES = \
        svc-records.cc svc-records.hh \
        syncres.cc syncres.hh \
        taskqueue.cc taskqueue.hh \
+       tcounters.hh \
        tcpiohandler.cc tcpiohandler.hh \
        threadname.hh threadname.cc \
        tsigverifier.cc tsigverifier.hh \
@@ -294,6 +296,7 @@ testrunner_SOURCES = \
        rcpgenerator.cc \
        rec-eventtrace.cc rec-eventtrace.hh \
        rec-taskqueue.cc rec-taskqueue.hh \
+       rec-tcounters.cc rec-tcounters.hh \
        rec-zonetocache.cc rec-zonetocache.hh \
        recpacketcache.cc recpacketcache.hh \
        recursor_cache.cc recursor_cache.hh \
@@ -334,6 +337,7 @@ testrunner_SOURCES = \
        test-packetcache_hh.cc \
        test-rcpgenerator_cc.cc \
        test-rec-taskqueue.cc \
+       test-rec-tcounters_cc.cc \
        test-rec-zonetocache.cc \
        test-recpacketcache_cc.cc \
        test-recursorcache_cc.cc \
index 415b7c83386a4067e385046856fce8c189823d45..a74515ff5783db076c811093243a5caa1844872b 100644 (file)
@@ -62,6 +62,7 @@ thread_local FrameStreamServersInfo t_nodFrameStreamServersInfo;
 string g_programname = "pdns_recursor";
 string g_pidfname;
 RecursorControlChannel g_rcc; // only active in the handler thread
+bool g_regressionTestMode;
 
 #ifdef NOD_ENABLED
 bool g_nodEnabled;
@@ -997,9 +998,16 @@ static void doStats(void)
 
   auto log = g_slog->withName("stats");
 
-  if (g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
+  auto qcounter = g_Counters.sum(rec::Counter::qcounter);
+  auto syncresqueries = g_Counters.sum(rec::Counter::syncresqueries);
+  auto outqueries = g_Counters.sum(rec::Counter::outqueries);
+  auto throttledqueries = g_Counters.sum(rec::Counter::throttledqueries);
+  auto tcpoutqueries = g_Counters.sum(rec::Counter::tcpoutqueries);
+  auto dotoutqueries = g_Counters.sum(rec::Counter::dotoutqueries);
+  auto outgoingtimeouts = g_Counters.sum(rec::Counter::outgoingtimeouts);
+  if (qcounter > 0 && (cacheHits + cacheMisses) > 0 && syncresqueries > 0 && outqueries > 0) {
     if (!g_slogStructured) {
-      g_log << Logger::Notice << "stats: " << g_stats.qcounter << " questions, " << cacheSize << " cache entries, " << negCacheSize << " negative entries, " << ratePercentage(cacheHits, cacheHits + cacheMisses) << "% cache hits" << endl;
+      g_log << Logger::Notice << "stats: " << qcounter << " questions, " << cacheSize << " cache entries, " << negCacheSize << " negative entries, " << ratePercentage(cacheHits, cacheHits + cacheMisses) << "% cache hits" << endl;
       g_log << Logger::Notice << "stats: cache contended/acquired " << rc_stats.first << '/' << rc_stats.second << " = " << r << '%' << endl;
 
       g_log << Logger::Notice << "stats: throttle map: "
@@ -1010,18 +1018,18 @@ static void doStats(void)
             << SyncRes::getNonResolvingNSSize() << ", saved-parentsets: "
             << SyncRes::getSaveParentsNSSetsSize()
             << endl;
-      g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries) << "%";
-      g_log << Logger::Notice << ", " << ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries + SyncRes::s_throttledqueries) << "% throttled" << endl;
-      g_log << Logger::Notice << "stats: " << SyncRes::s_tcpoutqueries << "/" << SyncRes::s_dotoutqueries << "/" << getCurrentIdleTCPConnections() << " outgoing tcp/dot/idle connections, " << broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries) << " queries running, " << SyncRes::s_outgoingtimeouts << " outgoing timeouts " << endl;
+      g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(outqueries, syncresqueries) << "%";
+      g_log << Logger::Notice << ", " << ratePercentage(throttledqueries, outqueries + throttledqueries) << "% throttled" << endl;
+      g_log << Logger::Notice << "stats: " << tcpoutqueries << "/" << dotoutqueries << "/" << getCurrentIdleTCPConnections() << " outgoing tcp/dot/idle connections, " << broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries) << " queries running, " << outgoingtimeouts << " outgoing timeouts " << endl;
 
-      g_log << Logger::Notice << "stats: " << pcSize << " packet cache entries, " << ratePercentage(pcHits, g_stats.qcounter) << "% packet cache hits" << endl;
+      g_log << Logger::Notice << "stats: " << pcSize << " packet cache entries, " << ratePercentage(pcHits, qcounter) << "% packet cache hits" << endl;
 
       g_log << Logger::Notice << "stats: tasks pushed/expired/queuesize: " << taskPushes << '/' << taskExpired << '/' << taskSize << endl;
     }
     else {
       const string m = "Periodic statistics report";
       log->info(Logr::Info, m,
-                "questions", Logging::Loggable(g_stats.qcounter),
+                "questions", Logging::Loggable(qcounter),
                 "cache-entries", Logging::Loggable(cacheSize),
                 "negcache-entries", Logging::Loggable(negCacheSize),
                 "record-cache-hitratio-perc", Logging::Loggable(ratePercentage(cacheHits, cacheHits + cacheMisses)),
@@ -1035,17 +1043,17 @@ static void doStats(void)
                 "edns-entries", Logging::Loggable(SyncRes::getEDNSStatusesSize()),
                 "non-resolving-nameserver-entries", Logging::Loggable(SyncRes::getNonResolvingNSSize()),
                 "saved-parent-ns-sets-entries", Logging::Loggable(SyncRes::getSaveParentsNSSetsSize()),
-                "outqueries-per-query", Logging::Loggable(ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries)));
+                "outqueries-per-query", Logging::Loggable(ratePercentage(outqueries, syncresqueries)));
       log->info(Logr::Info, m,
-                "throttled-queries-perc", Logging::Loggable(ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries + SyncRes::s_throttledqueries)),
-                "tcp-outqueries", Logging::Loggable(SyncRes::s_tcpoutqueries),
-                "dot-outqueries", Logging::Loggable(SyncRes::s_dotoutqueries),
+                "throttled-queries-perc", Logging::Loggable(ratePercentage(throttledqueries, outqueries + throttledqueries)),
+                "tcp-outqueries", Logging::Loggable(tcpoutqueries),
+                "dot-outqueries", Logging::Loggable(dotoutqueries),
                 "idle-tcpout-connections", Logging::Loggable(getCurrentIdleTCPConnections()),
                 "concurrent-queries", Logging::Loggable(broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)),
-                "outgoing-timeouts", Logging::Loggable(SyncRes::s_outgoingtimeouts));
+                "outgoing-timeouts", Logging::Loggable(outgoingtimeouts));
       log->info(Logr::Info, m,
                 "packetcache-entries", Logging::Loggable(pcSize),
-                "packetcache-hitratio-perc", Logging::Loggable(ratePercentage(pcHits, g_stats.qcounter)),
+                "packetcache-hitratio-perc", Logging::Loggable(ratePercentage(pcHits, qcounter)),
                 "taskqueue-pushed", Logging::Loggable(taskPushes),
                 "taskqueue-expired", Logging::Loggable(taskExpired),
                 "taskqueue-size", Logging::Loggable(taskSize));
@@ -1060,12 +1068,12 @@ static void doStats(void)
     }
     time_t now = time(0);
     if (lastOutputTime && lastQueryCount && now != lastOutputTime) {
-      SLOG(g_log << Logger::Notice << "stats: " << (g_stats.qcounter - lastQueryCount) / (now - lastOutputTime) << " qps (average over " << (now - lastOutputTime) << " seconds)" << endl,
-           log->info(Logr::Info, "Periodic QPS report", "qps", Logging::Loggable((g_stats.qcounter - lastQueryCount) / (now - lastOutputTime)),
+      SLOG(g_log << Logger::Notice << "stats: " << (qcounter - lastQueryCount) / (now - lastOutputTime) << " qps (average over " << (now - lastOutputTime) << " seconds)" << endl,
+           log->info(Logr::Info, "Periodic QPS report", "qps", Logging::Loggable((qcounter - lastQueryCount) / (now - lastOutputTime)),
                      "averagedOver", Logging::Loggable(now - lastOutputTime)));
     }
     lastOutputTime = now;
-    lastQueryCount = g_stats.qcounter;
+    lastQueryCount = qcounter;
   }
   else if (statsWanted) {
     SLOG(g_log << Logger::Notice << "stats: no stats yet!" << endl,
@@ -1354,6 +1362,7 @@ static int serviceMain(int argc, char* argv[], Logr::log_t log)
   g_log.setName(g_programname);
   g_log.disableSyslog(::arg().mustDo("disable-syslog"));
   g_log.setTimestamps(::arg().mustDo("log-timestamp"));
+  g_regressionTestMode = ::arg().mustDo("devonly-regression-test-mode");
 
   if (!::arg()["logging-facility"].empty()) {
     int val = logFacilityToLOG(::arg().asNum("logging-facility"));
@@ -1950,6 +1959,9 @@ static int serviceMain(int argc, char* argv[], Logr::log_t log)
   disableStats(StatComponent::RecControl, ::arg()["stats-rec-control-disabled-list"]);
   disableStats(StatComponent::SNMP, ::arg()["stats-snmp-disabled-list"]);
 
+  // Run before any thread doing stats related things
+  registerAllStats();
+
   if (::arg().mustDo("snmp-agent")) {
 #ifdef HAVE_NET_SNMP
     string setting = ::arg()["snmp-daemon-socket"];
@@ -2120,6 +2132,7 @@ static void houseKeeping(void*)
 
     struct timeval now;
     Utility::gettimeofday(&now);
+    t_Counters.updateSnap(now, g_regressionTestMode);
 
     // Below are the tasks that run for every recursorThread, including handler and taskThread
     if (t_packetCache) {
@@ -2225,7 +2238,10 @@ static void houseKeeping(void*)
       const unsigned int minRootRefreshInterval = 10;
       static PeriodicTask rootUpdateTask{"rootUpdateTask", std::max(SyncRes::s_maxcachettl * 8 / 10, minRootRefreshInterval)};
       rootUpdateTask.runIfDue(now, [now, &log, minRootRefreshInterval]() {
-        int res = SyncRes::getRootNS(now, nullptr, 0, log);
+        int res = 0;
+        if (!g_regressionTestMode) {
+          res = SyncRes::getRootNS(now, nullptr, 0, log);
+        }
         if (res == 0) {
           // Success, go back to the defaut period
           rootUpdateTask.setPeriod(std::max(SyncRes::s_maxcachettl * 8 / 10, minRootRefreshInterval));
@@ -2292,6 +2308,7 @@ static void houseKeeping(void*)
         }
       });
     }
+    t_Counters.updateSnap(g_regressionTestMode);
     t_running = false;
   }
   catch (const PDNSException& ae) {
@@ -2311,6 +2328,7 @@ static void houseKeeping(void*)
 static void recursorThread()
 {
   auto log = g_slog->withName("runtime");
+  t_Counters.updateSnap(true);
   try {
     auto& threadInfo = RecThreadInfo::self();
     {
@@ -2443,8 +2461,6 @@ static void recursorThread()
       }
     }
 
-    registerAllStats();
-
     if (threadInfo.isHandler()) {
       t_fdm->addReadFD(g_rcc.d_fd, handleRCC); // control channel
     }
@@ -2457,7 +2473,6 @@ static void recursorThread()
     time_t last_carbon = 0, last_lua_maintenance = 0;
     time_t carbonInterval = ::arg().asNum("carbon-interval");
     time_t luaMaintenanceInterval = ::arg().asNum("lua-maintenance-interval");
-    s_counter.store(0); // used to periodically execute certain tasks
 
 #ifdef HAVE_SYSTEMD
     if (threadInfo.isHandler()) {
@@ -2467,7 +2482,6 @@ static void recursorThread()
       sd_notify(0, "READY=1");
     }
 #endif
-
     while (!RecursorControlChannel::stop) {
       while (MT->schedule(&g_now))
         ; // MTasker letting the mthreads do their thing
@@ -2481,8 +2495,8 @@ static void recursorThread()
         if (!threadInfo.isTaskThread()) {
           struct timeval stop;
           Utility::gettimeofday(&stop);
-          g_stats.maintenanceUsec += uSec(stop - start);
-          ++g_stats.maintenanceCalls;
+          t_Counters.at(rec::Counter::maintenanceUsec) += uSec(stop - start);
+          ++t_Counters.at(rec::Counter::maintenanceCalls);
         }
       }
 
@@ -2526,8 +2540,8 @@ static void recursorThread()
             last_lua_maintenance = g_now.tv_sec;
             struct timeval stop;
             Utility::gettimeofday(&stop);
-            g_stats.maintenanceUsec += uSec(stop - start);
-            ++g_stats.maintenanceCalls;
+            t_Counters.at(rec::Counter::maintenanceUsec) += uSec(stop - start);
+            ++t_Counters.at(rec::Counter::maintenanceCalls);
           }
         }
       }
@@ -2587,6 +2601,8 @@ int main(int argc, char** argv)
 #else
     ::arg().set("stack-size", "stack size per mthread") = "200000";
 #endif
+    // This mode forces metrics snap updates and dsiable root-refresh, to get consistent counters
+    ::arg().setSwitch("devonly-regression-test-mode", "internal use only") = "no";
     ::arg().set("soa-minimum-ttl", "Don't change") = "0";
     ::arg().set("no-shuffle", "Don't change") = "off";
     ::arg().set("local-port", "port to listen on") = "53";
index 6c231f7fbe783661b37fe02df2a069e457952c1c..cb3787a8aea11fa7340ef2f2a1eff49d70e065ad 100644 (file)
@@ -154,6 +154,7 @@ struct DNSComboWriter
 extern thread_local unique_ptr<FDMultiplexer> t_fdm;
 extern uint16_t g_minUdpSourcePort;
 extern uint16_t g_maxUdpSourcePort;
+extern bool g_regressionTestMode;
 
 // you can ask this class for a UDP socket to send a query from
 // this socket is not yours, don't even think about deleting it
diff --git a/pdns/recursordist/rec-tcounters.cc b/pdns/recursordist/rec-tcounters.cc
new file mode 100644 (file)
index 0000000..b40815a
--- /dev/null
@@ -0,0 +1,156 @@
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "rec-tcounters.hh"
+
+#include <sstream>
+
+namespace rec
+{
+
+Counters& Counters::merge(const Counters& data)
+{
+  // Counters are simply added
+  for (size_t i = 0; i < uint64Count.size(); i++) {
+    uint64Count.at(i) += data.uint64Count.at(i);
+  }
+  // Averages: take weight into account
+  for (size_t i = 0; i < doubleWAvg.size(); i++) {
+    auto& lhs = doubleWAvg.at(i);
+    const auto& rhs = data.doubleWAvg.at(i);
+    auto weight = lhs.weight + rhs.weight;
+    auto avg = lhs.avg * static_cast<double>(lhs.weight) + rhs.avg * static_cast<double>(rhs.weight);
+    avg = weight == 0 ? 0 : avg / weight;
+    lhs.avg = avg;
+    lhs.weight = weight;
+  }
+  // Rcode Counters are simply added
+  for (size_t i = 0; i < auth.rcodeCounters.size(); i++) {
+    auth.rcodeCounters.at(i) += data.auth.rcodeCounters.at(i);
+  }
+  // Histograms counts are added by += operator on Histograms
+  for (size_t i = 0; i < histograms.size(); i++) {
+    histograms.at(i) += data.histograms.at(i);
+  }
+
+  return *this;
+}
+
+std::string Counters::toString() const
+{
+  std::ostringstream stream;
+
+  for (auto element : uint64Count) {
+    stream << element << ' ';
+  }
+  stream << std::endl;
+  for (auto element : doubleWAvg) {
+    stream << '(' << element.avg << ' ' << element.weight << ')';
+  }
+  stream << " RCodes: ";
+  for (auto element : auth.rcodeCounters) {
+    stream << element << ' ';
+  }
+  stream << "Histograms: ";
+  for (const auto& element : histograms) {
+    stream << element.getName() << ": NYI";
+  }
+  stream << std::endl;
+  return stream.str();
+}
+
+}
+
+// Compile with:
+// c++ -DTEST_TCOUNTER_TIMING -Wall -std=c++17 -O2 rec-tcounters.cc -pthread
+
+#if TEST_TCOUNTER_TIMING
+
+#include <iostream>
+#include <vector>
+#include <atomic>
+#include <thread>
+#include <ctime>
+
+rec::GlobalCounters g_counters;
+thread_local rec::TCounters t_counters(g_counters);
+
+std::atomic<uint64_t> atomicCounter;
+
+size_t iterations;
+
+void atomicThread()
+{
+  for (size_t i = 0; i < iterations; i++) {
+    ++atomicCounter;
+  }
+}
+
+void tcounterThread()
+{
+  for (size_t i = 0; i < iterations; i++) {
+    ++t_counters.at(rec::Counter::qcounter);
+    if (i % 100 == 0) {
+      t_counters.updateSnap();
+    }
+  }
+}
+
+int main(int argc, char* argv[])
+{
+  size_t threads = std::atoi(argv[1]);
+  iterations = std::atoi(argv[2]);
+
+  std::cout << "Starting " << threads << " threads doing " << iterations << " iterations using atomics" << std::endl;
+  std::vector<std::thread> thr;
+  thr.resize(threads);
+
+  timeval start;
+  gettimeofday(&start, nullptr);
+  for (size_t i = 0; i < threads; i++) {
+    thr[i] = std::thread(atomicThread);
+  }
+  for (size_t i = 0; i < threads; i++) {
+    thr[i].join();
+  }
+  timeval stop;
+  gettimeofday(&stop, nullptr);
+  timeval diff;
+  timersub(&stop, &start, &diff);
+  auto elapsed = (diff.tv_sec + diff.tv_usec / 1e6);
+  std::cout << "Sum is " << atomicCounter << " elapsed is " << elapsed << std::endl;
+
+  std::cout << "Now doing the same with tcounters" << std::endl;
+  gettimeofday(&start, nullptr);
+  for (size_t i = 0; i < threads; i++) {
+    thr[i] = std::thread(tcounterThread);
+  }
+  for (size_t i = 0; i < threads; i++) {
+    thr[i].join();
+  }
+  gettimeofday(&stop, nullptr);
+  timersub(&stop, &start, &diff);
+  elapsed = (diff.tv_sec + diff.tv_usec / 1e6);
+  std::cout << "Sum is " << g_counters.sum(rec::Counter::qcounter) << " elapsed is " << elapsed << std::endl;
+}
+
+#endif
diff --git a/pdns/recursordist/rec-tcounters.hh b/pdns/recursordist/rec-tcounters.hh
new file mode 100644 (file)
index 0000000..d315192
--- /dev/null
@@ -0,0 +1,219 @@
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#pragma once
+
+#include "tcounters.hh"
+
+#include <string>
+
+#include "histogram.hh"
+
+namespace rec
+{
+
+// Simple counters
+enum class Counter : uint8_t
+{
+  syncresqueries,
+  outgoingtimeouts,
+  outgoing4timeouts,
+  outgoing6timeouts,
+  throttledqueries,
+  dontqueries,
+  qnameminfallbacksuccess,
+  authzonequeries,
+  outqueries,
+  tcpoutqueries,
+  dotoutqueries,
+  unreachables,
+  servFails,
+  nxDomains,
+  noErrors,
+  qcounter,
+  ipv6qcounter,
+  tcpqcounter,
+  unauthorizedUDP, // when this is increased, qcounter isn't
+  unauthorizedTCP, // when this is increased, qcounter isn't
+  sourceDisallowedNotify, // when this is increased, qcounter is also
+  zoneDisallowedNotify, // when this is increased, qcounter is also
+  policyDrops,
+  tcpClientOverflow,
+  clientParseError,
+  serverParseError,
+  tooOldDrops,
+  truncatedDrops,
+  queryPipeFullDrops,
+  unexpectedCount,
+  caseMismatchCount,
+  spoofCount,
+  resourceLimits,
+  overCapacityDrops,
+  ipv6queries,
+  chainResends,
+  nsSetInvalidations,
+  ednsPingMatches,
+  ednsPingMismatches,
+  noPingOutQueries,
+  noEdnsOutQueries,
+  packetCacheHits,
+  noPacketError,
+  ignoredCount,
+  emptyQueriesCount,
+  dnssecQueries,
+  dnssecAuthenticDataQueries,
+  dnssecCheckDisabledQueries,
+  variableResponses,
+  maxMThreadStackUsage,
+  dnssecValidations, // should be the sum of all dnssecResult* stats
+  rebalancedQueries,
+  proxyProtocolInvalidCount,
+  nodLookupsDroppedOversize,
+  dns64prefixanswers,
+  maintenanceUsec,
+  maintenanceCalls,
+
+  numberOfCounters
+};
+
+// double avegares times, weighted according to how many packets they processed
+enum class DoubleWAvgCounter : uint8_t
+{
+  avgLatencyUsec,
+  avgLatencyOursUsec,
+  numberOfCounters
+};
+
+// An RCode histogram
+enum class RCode : uint8_t
+{
+  auth,
+  numberOfCounters
+};
+
+// A few other histograms
+enum class Histogram : uint8_t
+{
+  answers,
+  auth4Answers,
+  auth6Answers,
+  ourtime,
+  cumulativeAnswers,
+  cumulativeAuth4Answers,
+  cumulativeAuth6Answers,
+
+  numberOfCounters
+};
+
+struct Counters
+{
+  // An aray of simple counters
+  std::array<uint64_t, static_cast<size_t>(Counter::numberOfCounters)> uint64Count{};
+
+  struct WeightedAverage
+  {
+    double avg{};
+    uint64_t weight{};
+
+    void add(double value)
+    {
+      avg = value;
+      ++weight;
+    }
+
+    void addToRollingAvg(double value, uint64_t rollsize)
+    {
+      add((1.0 - 1.0 / static_cast<double>(rollsize)) * avg + value / static_cast<double>(rollsize));
+    }
+  };
+  // And an array of weighted averaged values
+  std::array<WeightedAverage, static_cast<size_t>(DoubleWAvgCounter::numberOfCounters)> doubleWAvg{};
+
+  struct RCodeCounters
+  {
+    RCodeCounters& operator+=(const RCodeCounters& rhs)
+    {
+      for (size_t i = 0; i < rcodeCounters.size(); i++) {
+        rcodeCounters.at(i) += rhs.rcodeCounters.at(i);
+      }
+      return *this;
+    }
+    static const size_t numberoOfRCodes = 16;
+    std::array<uint64_t, numberoOfRCodes> rcodeCounters;
+  };
+  // An RCodes histogram
+  RCodeCounters auth{};
+
+  std::array<pdns::Histogram, static_cast<size_t>(Histogram::numberOfCounters)> histograms = {
+    pdns::Histogram{"answers", {1000, 10000, 100000, 1000000}},
+    pdns::Histogram{"auth4answers", {1000, 10000, 100000, 1000000}},
+    pdns::Histogram{"auth6answers", {1000, 10000, 100000, 1000000}},
+    pdns::Histogram{"ourtime", {1000, 2000, 4000, 8000, 16000, 32000}},
+    pdns::Histogram{"cumul-clientanswers-", 10, 19},
+    pdns::Histogram{"cumul-authanswers-", 1000, 13},
+    pdns::Histogram{"cumul-authanswers-", 1000, 13}};
+
+  Counters()
+  {
+    for (auto& elem : uint64Count) {
+      elem = 0;
+    }
+    // doubleWAvg has a default ct that initializes
+    for (auto& elem : auth.rcodeCounters) {
+      elem = 0;
+    }
+  }
+
+  // Merge a set of counters into an existing set of counters. For simple counters, that will be additions
+  // for averages, we should take the weights into account. Histograms need to sum all individual counts.
+  Counters& merge(const Counters& data);
+
+  // The following accessors select the rightcounter type based on the index type
+  uint64_t& at(Counter index)
+  {
+    return uint64Count.at(static_cast<size_t>(index));
+  }
+
+  WeightedAverage& at(DoubleWAvgCounter index)
+  {
+    return doubleWAvg.at(static_cast<size_t>(index));
+  }
+
+  RCodeCounters& at(RCode index)
+  {
+    // We only have a single RCode indexed Histogram, so no need to select a specific one
+    return auth;
+  }
+
+  pdns::Histogram& at(Histogram index)
+  {
+    return histograms.at(static_cast<size_t>(index));
+  }
+
+  // Mainly for debugging purposes
+  [[nodiscard]] std::string toString() const;
+};
+
+// The application specific types, one for thread local, one for the aggregator
+using TCounters = pdns::TLocalCounters<Counters>;
+using GlobalCounters = pdns::GlobalCounters<Counters>;
+}
index e1bc143b6e3d76dc2c70196e82e687e1d3a0e9ce..a0b1972e06b1a3ebd52d3ee2f08a2d7bdad471dd 100644 (file)
@@ -229,7 +229,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
         SLOG(g_log << Logger::Error << "Unable to consume proxy protocol header in packet from TCP client " << conn->d_remote.toStringWithPort() << endl,
              g_slogtcpin->info(Logr::Error, "Unable to consume proxy protocol header in packet from TCP client", "remote", Logging::Loggable(conn->d_remote)));
       }
-      ++g_stats.proxyProtocolInvalidCount;
+      ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
       return;
     }
     else if (remaining < 0) {
@@ -250,7 +250,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
           SLOG(g_log << Logger::Error << "Unable to parse proxy protocol header in packet from TCP client " << conn->d_remote.toStringWithPort() << endl,
                g_slogtcpin->info(Logr::Error, "Unable to parse proxy protocol header in packet from TCP client", "remote", Logging::Loggable(conn->d_remote)));
         }
-        ++g_stats.proxyProtocolInvalidCount;
+        ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
         return;
       }
       else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
@@ -258,7 +258,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
           SLOG(g_log << Logger::Error << "Proxy protocol header in packet from TCP client " << conn->d_remote.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping" << endl,
                g_slogtcpin->info(Logr::Error, "Proxy protocol header in packet from TCP client is larger than proxy-protocol-maximum-size", "remote", Logging::Loggable(conn->d_remote), "size", Logging::Loggable(used)));
         }
-        ++g_stats.proxyProtocolInvalidCount;
+        ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
         return;
       }
 
@@ -278,7 +278,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                g_slogtcpin->info(Logr::Error, "Dropping TCP query, address not matched by allow-from", "remote", Logging::Loggable(conn->d_remote)));
         }
 
-        ++g_stats.unauthorizedTCP;
+        ++t_Counters.at(rec::Counter::unauthorizedTCP);
         return;
       }
 
@@ -348,7 +348,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
         dc = std::make_unique<DNSComboWriter>(conn->data, g_now, t_pdl);
       }
       catch (const MOADNSException& mde) {
-        g_stats.clientParseError++;
+        t_Counters.at(rec::Counter::clientParseError)++;
         if (g_logCommonErrors) {
           SLOG(g_log << Logger::Error << "Unable to parse packet from TCP client " << conn->d_remote.toStringWithPort() << endl,
                g_slogtcpin->info(Logr::Error, "Unable to parse packet from TCP client", "remte", Logging::Loggable(conn->d_remote)));
@@ -471,13 +471,13 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
             SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] DROPPED TCP question from " << dc->d_source.toStringWithPort() << (dc->d_source != dc->d_remote ? " (via " + dc->d_remote.toStringWithPort() + ")" : "") << " based on policy" << endl,
                  g_slogtcpin->info(Logr::Info, "Dropped TCP question based on policy", "remote", Logging::Loggable(conn->d_remote), "source", Logging::Loggable(dc->d_source)));
           }
-          g_stats.policyDrops++;
+          t_Counters.at(rec::Counter::policyDrops)++;
           return;
         }
       }
 
       if (dc->d_mdp.d_header.qr) {
-        g_stats.ignoredCount++;
+        t_Counters.at(rec::Counter::ignoredCount)++;
         if (g_logCommonErrors) {
           SLOG(g_log << Logger::Error << "Ignoring answer from TCP client " << dc->getRemote() << " on server socket!" << endl,
                g_slogtcpin->info(Logr::Error, "Ignoring answer from TCP client on server socket", "remote", Logging::Loggable(dc->getRemote())));
@@ -485,7 +485,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
         return;
       }
       if (dc->d_mdp.d_header.opcode != Opcode::Query && dc->d_mdp.d_header.opcode != Opcode::Notify) {
-        g_stats.ignoredCount++;
+        t_Counters.at(rec::Counter::ignoredCount)++;
         if (g_logCommonErrors) {
           SLOG(g_log << Logger::Error << "Ignoring unsupported opcode " << Opcode::to_s(dc->d_mdp.d_header.opcode) << " from TCP client " << dc->getRemote() << " on server socket!" << endl,
                g_slogtcpin->info(Logr::Error, "Ignoring unsupported opcode from TCP client", "remote", Logging::Loggable(dc->getRemote()), "opcode", Logging::Loggable(Opcode::to_s(dc->d_mdp.d_header.opcode))));
@@ -495,7 +495,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
         return;
       }
       else if (dh->qdcount == 0) {
-        g_stats.emptyQueriesCount++;
+        t_Counters.at(rec::Counter::emptyQueriesCount)++;
         if (g_logCommonErrors) {
           SLOG(g_log << Logger::Error << "Ignoring empty (qdcount == 0) query from " << dc->getRemote() << " on server socket!" << endl,
                g_slogtcpin->info(Logr::Error, "Ignoring empty (qdcount == 0) query on server socket", "remote", Logging::Loggable(dc->getRemote())));
@@ -506,8 +506,9 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
       }
       else {
         // We have read a proper query
-        ++g_stats.qcounter;
-        ++g_stats.tcpqcounter;
+        //++t_Counters.at(rec::Counter::qcounter);
+        ++t_Counters.at(rec::Counter::qcounter);
+        ++t_Counters.at(rec::Counter::tcpqcounter);
 
         if (dc->d_mdp.d_header.opcode == Opcode::Notify) {
           if (!t_allowNotifyFrom || !t_allowNotifyFrom->match(dc->d_mappedSource)) {
@@ -516,7 +517,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                    g_slogtcpin->info(Logr::Error, "Dropping TCP NOTIFY, address not matched by allow-notify-from", "source", Logging::Loggable(dc->d_mappedSource)));
             }
 
-            g_stats.sourceDisallowedNotify++;
+            t_Counters.at(rec::Counter::sourceDisallowedNotify)++;
             return;
           }
 
@@ -526,7 +527,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                    g_slogtcpin->info(Logr::Error, "Dropping TCP NOTIFY,  zone not matched by allow-notify-for", "source", Logging::Loggable(dc->d_mappedSource), "zone", Logging::Loggable(qname)));
             }
 
-            g_stats.zoneDisallowedNotify++;
+            t_Counters.at(rec::Counter::zoneDisallowedNotify)++;
             return;
           }
         }
@@ -555,7 +556,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
             struct timeval now;
             Utility::gettimeofday(&now, nullptr);
             uint64_t spentUsec = uSec(now - start);
-            g_stats.cumulativeAnswers(spentUsec);
+            t_Counters.at(rec::Histogram::cumulativeAnswers)(spentUsec);
             dc->d_eventTrace.add(RecEventTrace::AnswerSent);
 
             if (t_protobufServers.servers && dc->d_logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbData && !pbData->d_tagged)) {
@@ -571,6 +572,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                    g_slogtcpin->info(Logr::Info, dc->d_eventTrace.toString())); // More fancy?
             }
             tcpGuard.keep();
+            t_Counters.updateSnap(g_regressionTestMode);
             return;
           } // cache hit
         } // query opcode
@@ -618,7 +620,7 @@ void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t&)
   int newsock = accept(fd, (struct sockaddr*)&addr, &addrlen);
   if (newsock >= 0) {
     if (MT->numProcesses() > g_maxMThreads) {
-      g_stats.overCapacityDrops++;
+      t_Counters.at(rec::Counter::overCapacityDrops)++;
       try {
         closesocket(newsock);
       }
@@ -646,7 +648,7 @@ void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t&)
         SLOG(g_log << Logger::Error << "[" << MT->getTid() << "] dropping TCP query from " << mappedSource.toString() << ", address neither matched by allow-from nor proxy-protocol-from" << endl,
              g_slogtcpin->info(Logr::Error, "dropping TCP query address neither matched by allow-from nor proxy-protocol-from", "source", Logging::Loggable(mappedSource)));
 
-      g_stats.unauthorizedTCP++;
+      t_Counters.at(rec::Counter::unauthorizedTCP)++;
       try {
         closesocket(newsock);
       }
@@ -658,7 +660,7 @@ void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t&)
     }
 
     if (g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
-      g_stats.tcpClientOverflow++;
+      t_Counters.at(rec::Counter::tcpClientOverflow)++;
       try {
         closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
       }
diff --git a/pdns/recursordist/tcounters.hh b/pdns/recursordist/tcounters.hh
new file mode 120000 (symlink)
index 0000000..7bf637d
--- /dev/null
@@ -0,0 +1 @@
+../tcounters.hh
\ No newline at end of file
diff --git a/pdns/recursordist/test-rec-tcounters_cc.cc b/pdns/recursordist/test-rec-tcounters_cc.cc
new file mode 100644 (file)
index 0000000..a86cab0
--- /dev/null
@@ -0,0 +1,149 @@
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_NO_MAIN
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <boost/test/unit_test.hpp>
+
+#include <unistd.h>
+#include <thread>
+#include "rec-tcounters.hh"
+
+static rec::GlobalCounters global;
+static thread_local rec::TCounters tlocal(global);
+
+BOOST_AUTO_TEST_SUITE(test_rec_tcounters_cc)
+
+BOOST_AUTO_TEST_CASE(destruct)
+{
+  global.reset();
+
+  const size_t count = 100000;
+  std::thread thread1([] {
+    for (size_t i = 0; i < count; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+    }
+  });
+  std::thread thread2([] {
+    for (size_t i = 0; i < count; i++) {
+      ++tlocal.at(rec::Counter::nxDomains);
+    }
+  });
+  thread1.join();
+  thread2.join();
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::servFails), count);
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::nxDomains), count);
+}
+
+BOOST_AUTO_TEST_CASE(update_fast)
+{
+  global.reset();
+
+  std::atomic<uint64_t> done{};
+
+  const size_t count = 10000000;
+  std::thread thread1([&done] {
+    for (size_t i = 0; i < count; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+      ++tlocal.at(rec::Counter::nxDomains);
+      tlocal.at(rec::DoubleWAvgCounter::avgLatencyUsec).add(1.1);
+      if (random() % 10000 == 0) {
+        tlocal.updateSnap();
+      }
+    }
+    done++;
+  });
+  std::thread thread2([&done] {
+    for (size_t i = 0; i < count / 2; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+      ++tlocal.at(rec::Counter::nxDomains);
+      tlocal.at(rec::DoubleWAvgCounter::avgLatencyUsec).add(2.2);
+      if (random() % 10000 == 0) {
+        tlocal.updateSnap();
+      }
+    }
+    done++;
+  });
+  std::thread thread3([&done] {
+    while (done < 2) {
+      auto counts = global.aggregatedSnap();
+      BOOST_CHECK_EQUAL(counts.uint64Count[0], counts.uint64Count[1]);
+      auto avg = counts.at(rec::DoubleWAvgCounter::avgLatencyUsec).avg;
+      BOOST_CHECK(avg == 0.0 || (avg >= 1.1 && avg <= 2.2));
+    }
+  });
+  thread1.join();
+  thread2.join();
+  thread3.join();
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::servFails), count + count / 2);
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::nxDomains), count + count / 2);
+  auto avg = global.avg(rec::DoubleWAvgCounter::avgLatencyUsec);
+  BOOST_CHECK(avg >= 1.1 && avg <= 2.2);
+}
+
+BOOST_AUTO_TEST_CASE(update_with_sleep)
+{
+
+  global.reset();
+
+  std::atomic<int> done{};
+
+  const size_t count = 1000000;
+  std::thread thread1([&done] {
+    for (size_t i = 0; i < count; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+      ++tlocal.at(rec::Counter::nxDomains);
+      tlocal.at(rec::DoubleWAvgCounter::avgLatencyUsec).add(1.1);
+      if (random() % 10000 == 0) {
+        tlocal.updateSnap();
+      }
+      struct timespec interval
+      {
+        0, random() % 5000
+      };
+      nanosleep(&interval, nullptr);
+    }
+    done++;
+  });
+  std::thread thread2([&done] {
+    for (size_t i = 0; i < count / 2; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+      ++tlocal.at(rec::Counter::nxDomains);
+      tlocal.at(rec::DoubleWAvgCounter::avgLatencyUsec).add(2.2);
+      if (random() % 10000 == 0) {
+        tlocal.updateSnap();
+      }
+      struct timespec interval
+      {
+        0, random() % 999
+      };
+      nanosleep(&interval, nullptr);
+    }
+    done++;
+  });
+  std::thread thread3([&done] {
+    while (done < 2) {
+      auto counts = global.aggregatedSnap();
+      BOOST_CHECK_EQUAL(counts.uint64Count[0], counts.uint64Count[1]);
+      auto avg = counts.at(rec::DoubleWAvgCounter::avgLatencyUsec).avg;
+      // std::cerr << avg << std::endl;
+      BOOST_CHECK(avg == 0.0 || (avg >= 1.1 && avg <= 2.2));
+      struct timespec interval
+      {
+        0, random() % 10000
+      };
+      nanosleep(&interval, nullptr);
+    }
+  });
+  thread1.join();
+  thread2.join();
+  thread3.join();
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::servFails), count + count / 2);
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::nxDomains), count + count / 2);
+  auto avg = global.avg(rec::DoubleWAvgCounter::avgLatencyUsec);
+  BOOST_CHECK(avg >= 1.1 && avg <= 2.2);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
index ae2c062132927231100d4783bdd5deff25e291fc..4d47fcb0fcf9b31abd7e12de0b787a62cb79f948 100644 (file)
@@ -29,10 +29,6 @@ ArgvMap& arg()
   return theArg;
 }
 
-void primeRootNSZones(DNSSECMode, unsigned int)
-{
-}
-
 BaseLua4::~BaseLua4()
 {
 }
index 5fe1dbf69bba17c6bd47e9093deaf56c67cd04df..039ae2980149ac6f08ec588b9e7a6827b67481f7 100644 (file)
@@ -38,6 +38,9 @@
 #include "validate-recursor.hh"
 #include "rec-taskqueue.hh"
 
+rec::GlobalCounters g_Counters;
+thread_local rec::TCounters t_Counters(g_Counters);
+
 template<class T>
 class fails_t : public boost::noncopyable
 {
@@ -427,18 +430,6 @@ unsigned int SyncRes::s_serverdownthrottletime;
 unsigned int SyncRes::s_nonresolvingnsmaxfails;
 unsigned int SyncRes::s_nonresolvingnsthrottletime;
 unsigned int SyncRes::s_ecscachelimitttl;
-pdns::stat_t SyncRes::s_authzonequeries;
-pdns::stat_t SyncRes::s_queries;
-pdns::stat_t SyncRes::s_outgoingtimeouts;
-pdns::stat_t SyncRes::s_outgoing4timeouts;
-pdns::stat_t SyncRes::s_outgoing6timeouts;
-pdns::stat_t SyncRes::s_outqueries;
-pdns::stat_t SyncRes::s_tcpoutqueries;
-pdns::stat_t SyncRes::s_dotoutqueries;
-pdns::stat_t SyncRes::s_throttledqueries;
-pdns::stat_t SyncRes::s_dontqueries;
-pdns::stat_t SyncRes::s_qnameminfallbacksuccess;
-pdns::stat_t SyncRes::s_unreachables;
 pdns::stat_t SyncRes::s_ecsqueries;
 pdns::stat_t SyncRes::s_ecsresponses;
 std::map<uint8_t, pdns::stat_t> SyncRes::s_ecsResponsesBySubnetSize4;
@@ -486,11 +477,11 @@ static inline std::string fmtfloat(const char* fmt, double f)
 static inline void accountAuthLatency(uint64_t usec, int family)
 {
   if (family == AF_INET) {
-    g_stats.auth4Answers(usec);
-    g_stats.cumulativeAuth4Answers(usec);
+    t_Counters.at(rec::Histogram::auth4Answers)(usec);
+    t_Counters.at(rec::Histogram::cumulativeAuth4Answers)(usec);
   } else  {
-    g_stats.auth6Answers(usec);
-    g_stats.cumulativeAuth6Answers(usec);
+    t_Counters.at(rec::Histogram::auth6Answers)(usec);
+    t_Counters.at(rec::Histogram::cumulativeAuth6Answers)(usec);
   }
 }
 
@@ -694,7 +685,7 @@ int SyncRes::beginResolve(const DNSName &qname, const QType qtype, QClass qclass
 {
   d_eventTrace.add(RecEventTrace::SyncRes);
   vState state = vState::Indeterminate;
-  s_queries++;
+  t_Counters.at(rec::Counter::syncresqueries)++;
   d_wasVariable=false;
   d_wasOutOfBand=false;
   d_cutStates.clear();
@@ -726,7 +717,7 @@ int SyncRes::beginResolve(const DNSName &qname, const QType qtype, QClass qclass
 
   if (shouldValidate()) {
     if (d_queryValidationState != vState::Indeterminate) {
-      g_stats.dnssecValidations++;
+      t_Counters.at(rec::Counter::dnssecValidations)++;
     }
     auto xdnssec = g_xdnssec.getLocal();
     if (xdnssec->check(qname)) {
@@ -993,7 +984,7 @@ int SyncRes::AuthDomain::getRecords(const DNSName& qname, const QType qtype, std
 bool SyncRes::doOOBResolve(const AuthDomain& domain, const DNSName &qname, const QType qtype, vector<DNSRecord>&ret, int& res)
 {
   d_authzonequeries++;
-  s_authzonequeries++;
+  t_Counters.at(rec::Counter::authzonequeries)++;
 
   res = domain.getRecords(qname, qtype, ret);
   return true;
@@ -1516,7 +1507,7 @@ LWResult::Result SyncRes::asyncresolveWrapper(const ComboAddress& ip, bool ednsM
   for (int tries = 0; tries < 2; ++tries) {
 
     if (mode == EDNSStatus::NOEDNS) {
-      g_stats.noEdnsOutQueries++;
+      t_Counters.at(rec::Counter::noEdnsOutQueries)++;
       EDNSLevel = 0; // level != mode
     }
     else if (ednsMANDATORY || mode != EDNSStatus::NOEDNS) {
@@ -1772,7 +1763,7 @@ int SyncRes::doResolve(const DNSName &qname, const QType qtype, vector<DNSRecord
         res = doResolveNoQNameMinimization(qname, qtype, ret, depth/2, beenthere, state);
 
         if(res == RCode::NoError) {
-          s_qnameminfallbacksuccess++;
+          t_Counters.at(rec::Counter::qnameminfallbacksuccess)++;
         }
 
         QLOG("Step5 End resolve: " << RCode::to_s(res) << "/" << ret.size());
@@ -1853,7 +1844,7 @@ int SyncRes::doResolveNoQNameMinimization(const DNSName &qname, const QType qtyp
 
               d_totUsec += lwr.d_usec;
               accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
-              ++g_stats.authRCode.at(lwr.d_rcode);
+              ++t_Counters.at(rec::RCode::auth).rcodeCounters.at(static_cast<uint8_t>(lwr.d_rcode));
               if (fromCache)
                 *fromCache = true;
 
@@ -3242,7 +3233,7 @@ void SyncRes::handlePolicyHit(const std::string& prefix, const DNSName& qname, c
       return;
 
   case DNSFilterEngine::PolicyKind::Drop:
-    ++g_stats.policyDrops;
+    ++t_Counters.at(rec::Counter::policyDrops);
     throw ImmediateQueryDropException();
 
   case DNSFilterEngine::PolicyKind::NXDOMAIN:
@@ -3417,12 +3408,14 @@ bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress&
 {
   if (isThrottled(d_now.tv_sec, remoteIP)) {
     LOG(prefix<<qname<<": server throttled "<<endl);
-    s_throttledqueries++; d_throttledqueries++;
+    t_Counters.at(rec::Counter::throttledqueries)++;
+    d_throttledqueries++;
     return true;
   }
   else if (isThrottled(d_now.tv_sec, remoteIP, qname, qtype)) {
     LOG(prefix<<qname<<": query throttled "<<remoteIP.toString()<<", "<<qname<<"; "<<qtype<<endl);
-    s_throttledqueries++; d_throttledqueries++;
+    t_Counters.at(rec::Counter::throttledqueries)++;
+    d_throttledqueries++;
     return true;
   }
   else if(!pierceDontQuery && s_dontQuery && s_dontQuery->match(&remoteIP)) {
@@ -3432,14 +3425,14 @@ bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress&
     auto it = getBestAuthZone(&forwardCandidate);
     if (it == t_sstorage.domainmap->end()) {
       LOG(prefix<<qname<<": not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
-      s_dontqueries++;
+      t_Counters.at(rec::Counter::dontqueries)++;
       return true;
     } else {
       // The name (from the cache) is forwarded, but is it forwarded to an IP in known forwarders?
       const auto& ips = it->second.d_servers;
       if (std::find(ips.cbegin(), ips.cend(), remoteIP) == ips.cend()) {
         LOG(prefix<<qname<<": not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
-        s_dontqueries++;
+        t_Counters.at(rec::Counter::dontqueries)++;
         return true;
       } else {
         LOG(prefix<<qname<<": sending query to " << remoteIP.toString() << ", blocked by 'dont-query' but a forwarding/auth case" << endl);
@@ -5148,7 +5141,7 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
 {
   bool chained = false;
   LWResult::Result resolveret = LWResult::Result::Success;
-  s_outqueries++;
+  t_Counters.at(rec::Counter::outqueries)++;
   d_outqueries++;
   checkMaxQperQ(qname);
 
@@ -5159,11 +5152,11 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
   if(doTCP) {
     if (doDoT) {
       LOG(prefix<<qname<<": using DoT with "<< remoteIP.toStringWithPort() <<endl);
-      s_dotoutqueries++;
+      t_Counters.at(rec::Counter::dotoutqueries)++;
       d_dotoutqueries++;
     } else {
       LOG(prefix<<qname<<": using TCP with "<< remoteIP.toStringWithPort() <<endl);
-      s_tcpoutqueries++;
+      t_Counters.at(rec::Counter::tcpoutqueries)++;
       d_tcpoutqueries++;
     }
   }
@@ -5201,7 +5194,7 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
 
   d_totUsec += lwr.d_usec;
   accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
-  ++g_stats.authRCode.at(lwr.d_rcode);
+  ++t_Counters.at(rec::RCode::auth).rcodeCounters.at(static_cast<uint8_t>(lwr.d_rcode));
 
   if (!dontThrottle) {
     auto dontThrottleNames = g_dontThrottleNames.getLocal();
@@ -5216,12 +5209,12 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
 
       LOG(prefix<<qname<<": timeout resolving after "<<lwr.d_usec/1000.0<<"msec "<< (doTCP ? "over TCP" : "")<<endl);
       d_timeouts++;
-      s_outgoingtimeouts++;
+      t_Counters.at(rec::Counter::outgoingtimeouts)++;
 
       if(remoteIP.sin4.sin_family == AF_INET)
-        s_outgoing4timeouts++;
+        t_Counters.at(rec::Counter::outgoing4timeouts)++;
       else
-        s_outgoing6timeouts++;
+        t_Counters.at(rec::Counter::outgoing6timeouts)++;
 
       if(t_timeouts)
         t_timeouts->push_back(remoteIP);
@@ -5229,14 +5222,14 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
     else if (resolveret == LWResult::Result::OSLimitError) {
       /* OS resource limit reached */
       LOG(prefix<<qname<<": hit a local resource limit resolving"<< (doTCP ? " over TCP" : "")<<", probable error: "<<stringerror()<<endl);
-      g_stats.resourceLimits++;
+      t_Counters.at(rec::Counter::resourceLimits)++;
     }
     else if (resolveret == LWResult::Result::Spoofed) {
       spoofed = true;
     }
     else {
       /* LWResult::Result::PermanentError */
-      s_unreachables++;
+      t_Counters.at(rec::Counter::unreachables)++;
       d_unreachables++;
       // XXX questionable use of errno
       LOG(prefix<<qname<<": error resolving from "<<remoteIP.toString()<< (doTCP ? " over TCP" : "") <<", possible error: "<<stringerror()<< endl);
@@ -5570,7 +5563,7 @@ int SyncRes::doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, con
           LOG(prefix<<qname<<": Ageing nameservers for level '"<<auth<<"', next query might succeed"<<endl);
 
           if(g_recCache->doAgeCache(d_now.tv_sec, auth, QType::NS, 10))
-            g_stats.nsSetInvalidations++;
+            t_Counters.at(rec::Counter::nsSetInvalidations)++;
         }
         return -1;
       }
index 904089aa6dbac68f2241012661f768ab1d035f74..66b2508d9577bee9e4ac21b0266f1638bdc937de 100644 (file)
@@ -53,6 +53,7 @@
 #include "tcpiohandler.hh"
 #include "rec-eventtrace.hh"
 #include "logr.hh"
+#include "rec-tcounters.hh"
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -458,18 +459,6 @@ public:
 
   static thread_local ThreadLocalStorage t_sstorage;
 
-  static pdns::stat_t s_queries;
-  static pdns::stat_t s_outgoingtimeouts;
-  static pdns::stat_t s_outgoing4timeouts;
-  static pdns::stat_t s_outgoing6timeouts;
-  static pdns::stat_t s_throttledqueries;
-  static pdns::stat_t s_dontqueries;
-  static pdns::stat_t s_qnameminfallbacksuccess;
-  static pdns::stat_t s_authzonequeries;
-  static pdns::stat_t s_outqueries;
-  static pdns::stat_t s_tcpoutqueries;
-  static pdns::stat_t s_dotoutqueries;
-  static pdns::stat_t s_unreachables;
   static pdns::stat_t s_ecsqueries;
   static pdns::stat_t s_ecsresponses;
   static std::map<uint8_t, pdns::stat_t> s_ecsResponsesBySubnetSize4;
@@ -760,79 +749,17 @@ struct PacketIDBirthdayCompare
 };
 extern std::unique_ptr<MemRecursorCache> g_recCache;
 
+extern rec::GlobalCounters g_Counters;
+extern thread_local rec::TCounters t_Counters;
+
 struct RecursorStats
 {
-  pdns::stat_t servFails;
-  pdns::stat_t nxDomains;
-  pdns::stat_t noErrors;
-  pdns::AtomicHistogram answers;
-  pdns::AtomicHistogram auth4Answers;
-  pdns::AtomicHistogram auth6Answers;
-  pdns::AtomicHistogram ourtime;
-  pdns::AtomicHistogram cumulativeAnswers;
-  pdns::AtomicHistogram cumulativeAuth4Answers;
-  pdns::AtomicHistogram cumulativeAuth6Answers;
-  pdns::stat_t_trait<double> avgLatencyUsec;
-  pdns::stat_t_trait<double> avgLatencyOursUsec;
-  pdns::stat_t qcounter;     // not increased for unauth packets
-  pdns::stat_t ipv6qcounter;
-  pdns::stat_t tcpqcounter;
-  pdns::stat_t unauthorizedUDP;  // when this is increased, qcounter isn't
-  pdns::stat_t unauthorizedTCP;  // when this is increased, qcounter isn't
-  pdns::stat_t sourceDisallowedNotify;  // when this is increased, qcounter is also
-  pdns::stat_t zoneDisallowedNotify;  // when this is increased, qcounter is also
-  pdns::stat_t policyDrops;
-  pdns::stat_t tcpClientOverflow;
-  pdns::stat_t clientParseError;
-  pdns::stat_t serverParseError;
-  pdns::stat_t tooOldDrops;
-  pdns::stat_t truncatedDrops;
-  pdns::stat_t queryPipeFullDrops;
-  pdns::stat_t unexpectedCount;
-  pdns::stat_t caseMismatchCount;
-  pdns::stat_t spoofCount;
-  pdns::stat_t resourceLimits;
-  pdns::stat_t overCapacityDrops;
-  pdns::stat_t ipv6queries;
-  pdns::stat_t chainResends;
-  pdns::stat_t nsSetInvalidations;
-  pdns::stat_t ednsPingMatches;
-  pdns::stat_t ednsPingMismatches;
-  pdns::stat_t noPingOutQueries, noEdnsOutQueries;
-  pdns::stat_t packetCacheHits;
-  pdns::stat_t noPacketError;
-  pdns::stat_t ignoredCount;
-  pdns::stat_t emptyQueriesCount;
   time_t startupTime{time(nullptr)};
-  pdns::stat_t dnssecQueries;
-  pdns::stat_t dnssecAuthenticDataQueries;
-  pdns::stat_t dnssecCheckDisabledQueries;
-  pdns::stat_t variableResponses;
-  pdns::stat_t maxMThreadStackUsage;
-  pdns::stat_t dnssecValidations; // should be the sum of all dnssecResult* stats
+  // XXX Convert counter below to be part of rec::Counters
   std::map<vState, pdns::stat_t > dnssecResults;
   std::map<vState, pdns::stat_t > xdnssecResults;
   std::map<DNSFilterEngine::PolicyKind, pdns::stat_t > policyResults;
   LockGuarded<std::unordered_map<std::string, pdns::stat_t>> policyHits;
-  pdns::stat_t rebalancedQueries{0};
-  pdns::stat_t proxyProtocolInvalidCount{0};
-  pdns::stat_t nodLookupsDroppedOversize{0};
-  pdns::stat_t dns64prefixanswers{0};
-  pdns::stat_t maintenanceUsec{0};
-  pdns::stat_t maintenanceCalls{0};
-  std::array<pdns::stat_t, 16> authRCode;
-
-  RecursorStats() :
-    answers("answers", { 1000, 10000, 100000, 1000000 }),
-    auth4Answers("auth4answers", { 1000, 10000, 100000, 1000000 }),
-    auth6Answers("auth6answers", { 1000, 10000, 100000, 1000000 }),
-    ourtime("ourtime", { 1000, 2000, 4000, 8000, 16000, 32000 }),
-    cumulativeAnswers("cumul-clientanswers-", 10, 19),
-    // These two will be merged when outputting
-    cumulativeAuth4Answers("cumul-authanswers-", 1000, 13),
-    cumulativeAuth6Answers("cumul-authanswers-", 1000, 13)
-  {
-  }
 };
 
 //! represents a running TCP/IP client session
diff --git a/pdns/tcounters.hh b/pdns/tcounters.hh
new file mode 100644 (file)
index 0000000..c6956c6
--- /dev/null
@@ -0,0 +1,260 @@
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#pragma once
+
+#include <sys/time.h>
+#include <array>
+#include <set>
+#include <unistd.h>
+
+#include "lock.hh"
+
+namespace pdns
+{
+// We keep three sets of related counters:
+//
+// 1. The current counters (thread-local, updated individually by thread code very often)
+// 2. The snapshot counters (thread local, updated by thread code in one single mutex protected copy)
+// 3. The history counters (global) to keep track of the counters of deleted threads
+
+// We have two main clasess: one that holds the thread local counters
+// (both current and snapshot ones) and one that aggregates the
+// values for all threads adn keeps the history counters.
+
+// The thread local current counters are the ones updated by
+// performance critical code.  Every once in a while, all values in the
+// current counters are copied to the snapshot thread local copies in
+// a thread safe way.
+
+// The snapshot counters are aggregated by the GlobalCounters
+// class, as these can be accessed safely from multiple threads.
+
+// Make sure to call the thread local tlocal.updatesAtomics() once
+// in a while. This will fill the snapshot values for that thread if some
+// time has passed since the last snap update.
+
+// To fet aggregate value call globals.sum(counter1) or
+// globals.avg(counter2), or any aggreggation function.  If multiple
+// counters need to be collected in a consistent way:
+// auto data = globals.aggregatedSnap();
+//
+// Note that the aggregate values can mix somewhat older thread-local
+// with newer thread-local info from another thread. So it is possible
+// to see the following:
+//
+// If thread T1 increments "received" and then passes the packet to
+// thread T2 that increments "processed", it may happen that the value
+// of "processed" observed by sum() is higher than "received", as T1
+// might not have called updateSnap() yet while T2 did.  To avoid this
+// inconsistentcy, be careful to update related counters in a single
+// thread only.
+
+// For an example of the use of these templates, see rec-tcounters.hh
+
+template <typename Counters>
+class TLocalCounters;
+
+template <typename Counters>
+class GlobalCounters
+{
+public:
+  // Register a thread local set of values
+  void subscribe(TLocalCounters<Counters>* ptr)
+  {
+    auto lock = d_guarded.lock();
+    lock->d_instances.emplace(ptr);
+  }
+
+  // Unregister, typically done when a thread exits
+  void unsubscribe(TLocalCounters<Counters>* ptr, const Counters& data)
+  {
+    auto lock = d_guarded.lock();
+    lock->d_instances.erase(ptr);
+    lock->d_history.merge(data);
+  }
+
+  // Two ways of computing aggregated values for a specific counter: simple additions of all thread data, or taking weighted averages into account
+  template <typename Enum>
+  auto sum(Enum index);
+  template <typename Enum>
+  auto avg(Enum index);
+  template <typename Enum>
+  auto max(Enum index);
+
+  // Aggregate all counter data for all threads
+  Counters aggregatedSnap();
+
+  // Reset history
+  void reset()
+  {
+    auto lock = d_guarded.lock();
+    lock->d_history = Counters();
+  }
+
+private:
+  struct Guarded
+  {
+    // We have x instances, normally one per thread
+    std::set<TLocalCounters<Counters>*> d_instances;
+    // If an instance gets deleted because its thread is cleaned up, the values
+    // are accumulated in d_history
+    Counters d_history;
+  };
+  LockGuarded<Guarded> d_guarded;
+};
+
+template <typename Counters>
+class TLocalCounters
+{
+public:
+  static const suseconds_t defaultSnapUpdatePeriodus = 100000;
+  TLocalCounters(GlobalCounters<Counters>& collector, timeval interval = timeval{0, defaultSnapUpdatePeriodus}) :
+    d_collector(collector), d_interval(interval)
+  {
+    collector.subscribe(this);
+  }
+
+  ~TLocalCounters()
+  {
+    d_collector.unsubscribe(this, d_current);
+  }
+
+  TLocalCounters(const TLocalCounters&) = delete;
+  TLocalCounters(TLocalCounters&&) = delete;
+  TLocalCounters& operator=(const TLocalCounters&) = delete;
+  TLocalCounters& operator=(TLocalCounters&&) = delete;
+
+  template <typename Enum>
+  auto& at(Enum index)
+  {
+    return d_current.at(index);
+  }
+
+  template <typename Enum>
+  auto snapAt(Enum index)
+  {
+    return d_snapshot.lock()->at(index);
+  }
+
+  [[nodiscard]] Counters getSnap()
+  {
+    return *(d_snapshot.lock());
+  }
+
+  bool updateSnap(const timeval& tv_now, bool force = false)
+  {
+    timeval tv_diff{};
+
+    if (!force) {
+      timersub(&tv_now, &d_last, &tv_diff);
+    }
+    if (force || timercmp(&tv_diff, &d_interval, >=)) {
+      // It's a copy
+      *(d_snapshot.lock()) = d_current;
+      d_last = tv_now;
+      return true;
+    }
+    return false;
+  }
+
+  bool updateSnap(bool force = false)
+  {
+    timeval tv_now{};
+
+    if (!force) {
+      gettimeofday(&tv_now, nullptr);
+    }
+    return updateSnap(tv_now, force);
+  }
+
+private:
+  GlobalCounters<Counters>& d_collector;
+  Counters d_current;
+  LockGuarded<Counters> d_snapshot;
+  timeval d_last{0, 0};
+  const timeval d_interval;
+};
+
+// Sum for a specific index
+// In the future we might the move the specifics of computing an aggregated value to the
+// app specific Counters class
+template <typename Counters>
+template <typename Enum>
+auto GlobalCounters<Counters>::sum(Enum index)
+{
+  auto lock = d_guarded.lock();
+  auto sum = lock->d_history.at(index);
+  for (const auto& instance : lock->d_instances) {
+    sum += instance->snapAt(index);
+  }
+  return sum;
+}
+
+// Average for a specific index
+// In the future we might the move the specifics of computing an aggregated value to the
+// app specific Counters class
+template <typename Counters>
+template <typename Enum>
+auto GlobalCounters<Counters>::avg(Enum index)
+{
+  auto lock = d_guarded.lock();
+  auto wavg = lock->d_history.at(index);
+  auto sum = wavg.avg * wavg.weight;
+  auto count = wavg.weight;
+  for (const auto& instance : lock->d_instances) {
+    auto val = instance->snapAt(index);
+    count += val.weight;
+    sum += val.avg * val.weight;
+  }
+  return count > 0 ? sum / count : 0;
+}
+
+// Max for a specific  index
+// In the future we might the move the specifics of computing an aggregated value to the
+// app specific Counters class
+template <typename Counters>
+template <typename Enum>
+auto GlobalCounters<Counters>::max(Enum index)
+{
+  auto lock = d_guarded.lock();
+  uint64_t max = 0; // ignore history
+  for (const auto& instance : lock->d_instances) {
+    max = std::max(instance->snapAt(index), max);
+  }
+  return max;
+}
+
+// Get a consistent snap of *all* aggregated values
+template <typename Counters>
+Counters GlobalCounters<Counters>::aggregatedSnap()
+{
+  auto lock = d_guarded.lock();
+  Counters ret = lock->d_history;
+  for (const auto& instance : lock->d_instances) {
+    auto snap = instance->getSnap();
+    ret.merge(snap);
+  }
+  return ret;
+}
+
+}
index 2576b0955b5802a2214ab22928b6c365983ad687..165208b7926aed4ab3b0d9073a3814acb8640975 100644 (file)
@@ -39,7 +39,6 @@ class RecursorTest(AssertEqualDNSMessageMixin, unittest.TestCase):
 
     _confdir = 'recursor'
 
-    _recursorStartupDelay = 2.0
     _recursorPort = 5300
 
     _recursor = None
@@ -649,11 +648,11 @@ distributor-threads={threads}""".format(confdir=confdir,
             raise AssertionError('%s failed (%d)' % (recursorcmd, cls._recursor.returncode))
 
     @classmethod
-    def wipeRecursorCache(cls, confdir):
+    def wipeRecursorCache(cls, confdir, name='.$'):
         rec_controlCmd = [os.environ['RECCONTROL'],
                           '--config-dir=%s' % confdir,
                           'wipe-cache',
-                          '.$']
+                          name]
         try:
             subprocess.check_output(rec_controlCmd, stderr=subprocess.STDOUT)
         except subprocess.CalledProcessError as e:
index a4640176c69df64241f9d6321345128d79dfed6c..808bbdd3d8473e367b6a4349924b2209d29c9313 100644 (file)
@@ -11,6 +11,7 @@ class AggressiveNSECCacheBase(RecursorTest):
     _wsTimeout = 10
     _wsPassword = 'secretpassword'
     _apiKey = 'secretapikey'
+    #_recursorStartupDelay = 4.0
     _config_template = """
     dnssec=validate
     aggressive-nsec-cache-size=10000
@@ -19,12 +20,14 @@ class AggressiveNSECCacheBase(RecursorTest):
     webserver-address=127.0.0.1
     webserver-password=%s
     api-key=%s
+    devonly-regression-test-mode
     """ % (_wsPort, _wsPassword, _apiKey)
 
     @classmethod
     def wipe(cls):
         confdir = os.path.join('configs', cls._confdir)
-        cls.wipeRecursorCache(confdir)
+        # Only wipe examples, as wiping the root triggers root NS refreshes
+        cls.wipeRecursorCache(confdir, "example$")
 
     def getMetric(self, name):
         headers = {'x-api-key': self._apiKey}
@@ -42,8 +45,8 @@ class AggressiveNSECCacheBase(RecursorTest):
         self.assertTrue(False)
 
     def testNoData(self):
-
         self.wipe()
+
         # first we query a non-existent type, to get the NSEC in our cache
         entries = self.getMetric('aggressive-nsec-cache-entries')
         res = self.sendQuery('host1.secure.example.', 'TXT')
@@ -72,8 +75,8 @@ class AggressiveNSECCacheNSEC(AggressiveNSECCacheBase):
     # we can't use the same tests for NSEC and NSEC3 because the hashed NSEC3s
     # do not deny the same names than the non-hashed NSECs do
     def testNXD(self):
-
         self.wipe()
+
         # first we query a non-existent name, to get the needed NSECs (name + widcard) in our cache
         entries = self.getMetric('aggressive-nsec-cache-entries')
         hits = self.getMetric('aggressive-nsec-cache-nsec-hits')
@@ -100,8 +103,8 @@ class AggressiveNSECCacheNSEC(AggressiveNSECCacheBase):
         self.assertGreater(self.getMetric('aggressive-nsec-cache-nsec-hits'), hits)
 
     def testWildcard(self):
-
         self.wipe()
+
         # first we query a non-existent name, but for which a wildcard matches,
         # to get the NSEC in our cache
         res = self.sendQuery('test1.wildcard.secure.example.', 'A')
@@ -231,6 +234,7 @@ class AggressiveNSECCacheNSEC3(AggressiveNSECCacheBase):
             raise AssertionError('%s failed (%d): %s' % (pdnsutilCmd, e.returncode, e.output))
 
     def testNXD(self):
+        self.wipe()
 
         # first we query a non-existent name, to get the needed NSEC3s in our cache
         res = self.sendQuery('host2.secure.example.', 'TXT')
@@ -250,6 +254,7 @@ class AggressiveNSECCacheNSEC3(AggressiveNSECCacheBase):
         self.assertEqual(nbQueries, self.getMetric('all-outqueries'))
 
     def testWildcard(self):
+        self.wipe()
 
         # first let's get the SOA and wildcard NSEC in our cache by asking a name that matches the wildcard
         # but a type that does not exist
@@ -297,6 +302,8 @@ class AggressiveNSECCacheNSEC3(AggressiveNSECCacheBase):
         self.assertEqual(nbQueries, self.getMetric('all-outqueries'))
 
     def test_OptOut(self):
+        self.wipe()
+
         # query a name in an opt-out zone
         res = self.sendQuery('ns2.optout.example.', 'A')
         self.assertRcodeEqual(res, dns.rcode.NXDOMAIN)
index 62f6537e2c559f3831894feb83a360e95f29cb09..b9d1b0b1562f34e0fbce979a5a4670850be46407 100644 (file)
@@ -46,6 +46,7 @@ webserver-port=%d
 webserver-address=127.0.0.1
 webserver-password=%s
 api-key=%s
+devonly-regression-test-mode
 """ % (_wsPort, _wsPassword, _apiKey)
 
     def testRootNXTrust(self):
@@ -94,6 +95,7 @@ webserver-port=%d
 webserver-address=127.0.0.1
 webserver-password=%s
 api-key=%s
+devonly-regression-test-mode
 """ % (_wsPort, _wsPassword, _apiKey)
 
     def testRootNXTrust(self):
index 0994a47ec9c0e4709a6e970aa16ddf93f6a867d3..1026001d9ecc35a4e9ff832f6707808dd760ee2e 100644 (file)
@@ -12,6 +12,7 @@ class testSimpleDoT(RecursorTest):
     _config_template = """
 dnssec=validate
 dot-to-auth-names=powerdns.com
+devonly-regression-test-mode
     """
 
     _roothints = None
index 95e2fbca04033713058343eace766212d809c412..d839d43084ecec58e43d1ff642611ced797c72ed 100644 (file)
@@ -12,6 +12,7 @@ class testSimpleForwardOverDoT(RecursorTest):
     _config_template = """
 dnssec=validate
 forward-zones-recurse=.=9.9.9.9:853
+devonly-regression-test-mode
     """
 
     @classmethod