Introducing TCounters

author Otto Moerbeek <otto.moerbeek@open-xchange.com>

Tue, 8 Nov 2022 10:10:54 +0000 (11:10 +0100)

committer Otto Moerbeek <otto.moerbeek@open-xchange.com>

Mon, 12 Dec 2022 12:14:02 +0000 (13:14 +0100)
author Otto Moerbeek <otto.moerbeek@open-xchange.com>
Tue, 8 Nov 2022 10:10:54 +0000 (11:10 +0100)
committer Otto Moerbeek <otto.moerbeek@open-xchange.com>
Mon, 12 Dec 2022 12:14:02 +0000 (13:14 +0100)
diff --git a/pdns/histogram.hh b/pdns/histogram.hh

index 7cfbcac343e27905ea03b6f9f69822c977903efc..931841eb5d60fdc09ff87fabc1b5baa58aa1cb83 100644 (file)
--- a/pdns/histogram.hh
+++ b/pdns/histogram.hh
@@ -21,6 +21,7 @@
   */
  #pragma once
  
+#include <cassert>
  #include <algorithm>
  #include <limits>
  #include <stdexcept>
@@ -35,8 +36,10 @@ namespace pdns
  // By convention, we are using microsecond units
  struct Bucket
  {
-  const std::string d_name;
-  const uint64_t d_boundary{0};
+  Bucket(std::string name, uint64_t boundary, uint64_t val) :
+    d_name(std::move(name)), d_boundary(boundary), d_count(val) {}
+  std::string d_name;
+  uint64_t d_boundary{0};
    mutable uint64_t d_count{0};
  };
  
@@ -147,9 +150,22 @@ public:
      d_sum += d;
    }
  
+  BaseHistogram& operator+=(const BaseHistogram& rhs)
+  {
+    assert(d_name == rhs.d_name);
+    assert(d_buckets.size() == rhs.d_buckets.size());
+    for (size_t bucket = 0; bucket < d_buckets.size(); ++bucket) {
+      assert(d_buckets[bucket].d_name == rhs.d_buckets[bucket].d_name);
+      assert(d_buckets[bucket].d_boundary == rhs.d_buckets[bucket].d_boundary);
+      d_buckets[bucket].d_count += rhs.d_buckets[bucket].d_count;
+    }
+    d_sum += rhs.d_sum;
+    return *this;
+  }
+
  private:
    std::vector<B> d_buckets;
-  const std::string d_name;
+  std::string d_name;
    mutable SumType d_sum{0};
  
    std::vector<uint64_t> to125(uint64_t start, int num)
diff --git a/pdns/lwres.cc b/pdns/lwres.cc

index 7cc80f4e5961c58379e3dea697db168fe496369c..aa1c7889c159d80ad985180e2ba3b60216f714a0 100644 (file)
--- a/pdns/lwres.cc
+++ b/pdns/lwres.cc
@@ -466,7 +466,7 @@ static LWResult::Result asyncresolve(const ComboAddress& ip, const DNSName& doma
    if(!doTCP) {
      int queryfd;
      if (ip.sin4.sin_family==AF_INET6) {
-      g_stats.ipv6queries++;
+      t_Counters.at(rec::Counter::ipv6queries)++;
      }
  
      ret = asendto((const char*)&*vpacket.begin(), vpacket.size(), 0, ip, qid, domain, type, &queryfd);
@@ -624,7 +624,7 @@ static LWResult::Result asyncresolve(const ComboAddress& ip, const DNSName& doma
  
      lwr->d_rcode = RCode::FormErr;
      lwr->d_validpacket = false;
-    g_stats.serverParseError++;
+    t_Counters.at(rec::Counter::serverParseError)++;
  
      if(outgoingLoggers) {
        logIncomingResponse(outgoingLoggers, context ? context->d_initialRequestId : boost::none, uuid, ip, domain, type, qid, doTCP, dnsOverTLS, srcmask, len, lwr->d_rcode, lwr->d_records, queryTime, exportTypes);
@@ -637,7 +637,7 @@ static LWResult::Result asyncresolve(const ComboAddress& ip, const DNSName& doma
           g_slogout->info(Logr::Notice, "Unknown error parsing packet from remote server", "server", Logging::Loggable(ip)));
    }
  
-  g_stats.serverParseError++;
+  t_Counters.at(rec::Counter::serverParseError)++;
  
   out:
    if (!lwr->d_rcode) {
diff --git a/pdns/pdns_recursor.cc b/pdns/pdns_recursor.cc

index ee826d784da70d37efe5adc2a3f7d1aac880fba5..b88c1987e3846efbf924c2e088f7271d415803c8 100644 (file)
--- a/pdns/pdns_recursor.cc
+++ b/pdns/pdns_recursor.cc
@@ -338,7 +338,7 @@ LWResult::Result arecvfrom(PacketBuffer& packet, int flags, const ComboAddress&
                               "nearmisslimit", Logging::Loggable(nearMissLimit),
                               "qname", Logging::Loggable(domain),
                               "from", Logging::Loggable(fromaddr)));
-      g_stats.spoofCount++;
+      t_Counters.at(rec::Counter::spoofCount)++;
        return LWResult::Result::Spoofed;
      }
  
@@ -366,13 +366,13 @@ static void updateResponseStats(int res, const ComboAddress& remote, unsigned in
        if (query && t_servfailqueryring) // packet cache
          t_servfailqueryring->push_back({*query, qtype});
      }
-    g_stats.servFails++;
+    ++t_Counters.at(rec::Counter::servFails);
      break;
    case RCode::NXDomain:
-    g_stats.nxDomains++;
+    ++t_Counters.at(rec::Counter::nxDomains);
      break;
    case RCode::NoError:
-    g_stats.noErrors++;
+    t_Counters.at(rec::Counter::noErrors)++;
      break;
    }
  }
@@ -505,7 +505,7 @@ static PolicyResult handlePolicyHit(const DNSFilterEngine::Policy& appliedPolicy
  
    case DNSFilterEngine::PolicyKind::Drop:
      tcpGuard.setDropOnIdle();
-    ++g_stats.policyDrops;
+    ++t_Counters.at(rec::Counter::policyDrops);
      return PolicyResult::Drop;
  
    case DNSFilterEngine::PolicyKind::NXDOMAIN:
@@ -594,7 +594,7 @@ static void sendNODLookup(Logr::log_t nodlogger, const DNSName& dname)
        if (g_logCommonErrors) {
          nodlogger->v(10)->error(Logr::Error, "DNSName too long", "Unable to send NOD lookup");
        }
-      ++g_stats.nodLookupsDroppedOversize;
+      ++t_Counters.at(rec::Counter::nodLookupsDroppedOversize);
        return;
      }
      nodlogger->v(10)->info(Logr::Debug, "Sending NOD lookup", "nodqname", Logging::Loggable(qname));
@@ -721,7 +721,7 @@ int getFakeAAAARecords(const DNSName& qname, ComboAddress prefix, vector<DNSReco
                  }),
                ret.end());
    }
-  g_stats.dns64prefixanswers++;
+  t_Counters.at(rec::Counter::dns64prefixanswers)++;
    return rcode;
  }
  
@@ -752,7 +752,7 @@ int getFakePTRRecords(const DNSName& qname, vector<DNSRecord>& ret)
    auto log = g_slog->withName("dns64")->withValues("method", Logging::Loggable("getPTR"));
    int rcode = directResolve(DNSName(newquery), QType::PTR, QClass::IN, ret, t_pdl, log);
  
-  g_stats.dns64prefixanswers++;
+  t_Counters.at(rec::Counter::dns64prefixanswers)++;
    return rcode;
  }
  
@@ -952,14 +952,14 @@ void startDoResolve(void* p)
        // Does the requestor want DNSSEC records?
        if (edo.d_extFlags & EDNSOpts::DNSSECOK) {
          DNSSECOK = true;
-        g_stats.dnssecQueries++;
+        t_Counters.at(rec::Counter::dnssecQueries)++;
        }
        if (dc->d_mdp.d_header.cd) {
          /* Per rfc6840 section 5.9, "When processing a request with
             the Checking Disabled (CD) bit set, a resolver SHOULD attempt
             to return all response data, even data that has failed DNSSEC
             validation. */
-        ++g_stats.dnssecCheckDisabledQueries;
+        ++t_Counters.at(rec::Counter::dnssecCheckDisabledQueries);
        }
        if (dc->d_mdp.d_header.ad) {
          /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
@@ -967,7 +967,7 @@ void startDoResolve(void* p)
             value of the AD bit in the response.  This allows a requester to
             indicate that it understands the AD bit without also requesting
             DNSSEC data via the DO bit. */
-        ++g_stats.dnssecAuthenticDataQueries;
+        ++t_Counters.at(rec::Counter::dnssecAuthenticDataQueries);
        }
      }
      else {
@@ -1165,7 +1165,7 @@ void startDoResolve(void* p)
        }
        catch (const ImmediateQueryDropException& e) {
          // XXX We need to export a protobuf message (and do a NOD lookup) if requested!
-        g_stats.policyDrops++;
+        t_Counters.at(rec::Counter::policyDrops)++;
          SLOG(g_log << Logger::Debug << "Dropping query because of a filtering policy " << makeLoginfo(dc) << endl,
               sr.d_slog->info(Logr::Debug, "Dropping query because of a filtering policy"));
          return;
@@ -1286,7 +1286,7 @@ void startDoResolve(void* p)
      if (res == -1) {
        pw.getHeader()->rcode = RCode::ServFail;
        // no commit here, because no record
-      g_stats.servFails++;
+      ++t_Counters.at(rec::Counter::servFails);
      }
      else {
        pw.getHeader()->rcode = res;
@@ -1584,7 +1584,7 @@ void startDoResolve(void* p)
  #endif /* NOD_ENABLED */
  
      if (variableAnswer || sr.wasVariable()) {
-      g_stats.variableResponses++;
+      t_Counters.at(rec::Counter::variableResponses)++;
      }
  
      if (t_protobufServers.servers && !(luaconfsLocal->protobufExportConfig.taggedOnly && appliedPolicy.getName().empty() && dc->d_policyTags.empty())) {
@@ -1753,19 +1753,19 @@ void startDoResolve(void* p)
        }
      }
  
-    g_stats.answers(spentUsec);
-    g_stats.cumulativeAnswers(spentUsec);
+    t_Counters.at(rec::Histogram::answers)(spentUsec);
+    t_Counters.at(rec::Histogram::cumulativeAnswers)(spentUsec);
  
      double newLat = spentUsec;
      newLat = min(newLat, g_networkTimeoutMsec * 1000.0); // outliers of several minutes exist..
-    g_stats.avgLatencyUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyUsec + newLat / g_latencyStatSize;
+    t_Counters.at(rec::DoubleWAvgCounter::avgLatencyUsec).addToRollingAvg(newLat, g_latencyStatSize);
      // no worries, we do this for packet cache hits elsewhere
  
      if (spentUsec >= sr.d_totUsec) {
        uint64_t ourtime = spentUsec - sr.d_totUsec;
-      g_stats.ourtime(ourtime);
+      t_Counters.at(rec::Histogram::ourtime)(ourtime);
        newLat = ourtime; // usec
-      g_stats.avgLatencyOursUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyOursUsec + newLat / g_latencyStatSize;
+      t_Counters.at(rec::DoubleWAvgCounter::avgLatencyOursUsec).addToRollingAvg(newLat, g_latencyStatSize);
      }
  
  #ifdef NOD_ENABLED
@@ -1807,9 +1807,13 @@ void startDoResolve(void* p)
           sr.d_slog->info(Logr::Error, "Any other exception in a resolver context"));
    }
  
+  if (g_regressionTestMode) {
+    t_Counters.updateSnap(g_regressionTestMode);
+  }
    runTaskOnce(g_logCommonErrors);
  
-  g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage.load());
+  t_Counters.at(rec::Counter::maxMThreadStackUsage) = max(MT->getMaxStackUsage(), t_Counters.at(rec::Counter::maxMThreadStackUsage));
+  t_Counters.updateSnap(g_regressionTestMode);
  }
  
  void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass,
@@ -1916,19 +1920,20 @@ bool checkForCacheHit(bool qnameParsed, unsigned int tag, const string& data,
        }
      }
  
-    g_stats.packetCacheHits++;
-    SyncRes::s_queries++;
+    t_Counters.at(rec::Counter::packetCacheHits)++;
+    t_Counters.at(rec::Counter::syncresqueries)++; // XXX
      ageDNSPacket(response, age);
      if (response.length() >= sizeof(struct dnsheader)) {
        const struct dnsheader* dh = reinterpret_cast<const dnsheader*>(response.data());
        updateResponseStats(dh->rcode, source, response.length(), 0, 0);
      }
-    g_stats.avgLatencyUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
-    g_stats.avgLatencyOursUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
+    // we assume 0 usec
+    t_Counters.at(rec::DoubleWAvgCounter::avgLatencyUsec).addToRollingAvg(0.0, g_latencyStatSize);
+    t_Counters.at(rec::DoubleWAvgCounter::avgLatencyOursUsec).addToRollingAvg(0.0, g_latencyStatSize);
  #if 0
      // XXX changes behaviour compared to old code!
-    g_stats.answers(0);
-    g_stats.ourtime(0);
+    t_Counters.at(rec::Counter::answers)(0);
+    t_Counters.at(rec::Counter::ourtime)(0);
  #endif
    }
  
@@ -1977,14 +1982,15 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
      double delta = (diff.tv_sec * 1000 + diff.tv_usec / 1000.0);
  
      if (delta > 1000.0) {
-      g_stats.tooOldDrops++;
+      t_Counters.at(rec::Counter::tooOldDrops)++;
        return nullptr;
      }
    }
  
-  ++g_stats.qcounter;
+  ++t_Counters.at(rec::Counter::qcounter);
+
    if (fromaddr.sin4.sin_family == AF_INET6)
-    g_stats.ipv6qcounter++;
+    t_Counters.at(rec::Counter::ipv6qcounter)++;
  
    string response;
    const dnsheader_aligned headerdata(question.data());
@@ -2142,7 +2148,8 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
          struct timeval now;
          Utility::gettimeofday(&now, nullptr);
          uint64_t spentUsec = uSec(now - tv);
-        g_stats.cumulativeAnswers(spentUsec);
+        t_Counters.at(rec::Histogram::cumulativeAnswers)(spentUsec);
+        t_Counters.updateSnap(g_regressionTestMode);
          return 0;
        }
      }
@@ -2162,7 +2169,7 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
          SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] DROPPED question from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << " based on policy" << endl,
               g_slogudpin->info(Logr::Notice, "Dropped question based on policy", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
        }
-      g_stats.policyDrops++;
+      t_Counters.at(rec::Counter::policyDrops)++;
        return 0;
      }
    }
@@ -2174,7 +2181,7 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
               g_slogudpin->info(Logr::Notice, "Dropping UDP NOTIFY, zone not matched by allow-notify-for", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
        }
  
-      g_stats.zoneDisallowedNotify++;
+      t_Counters.at(rec::Counter::zoneDisallowedNotify)++;
        return 0;
      }
  
@@ -2197,7 +2204,7 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr
        SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] DROPPED question from " << source.toStringWithPort() << (source != fromaddr ? " (via " + fromaddr.toStringWithPort() + ")" : "") << ", over capacity" << endl,
             g_slogudpin->info(Logr::Notice, "Dropped question, over capacity", "source", Logging::Loggable(source), "remote", Logging::Loggable(fromaddr)));
  
-    g_stats.overCapacityDrops++;
+    t_Counters.at(rec::Counter::overCapacityDrops)++;
      return 0;
    }
  
@@ -2270,7 +2277,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
        firstQuery = false;
  
        if (msgh.msg_flags & MSG_TRUNC) {
-        g_stats.truncatedDrops++;
+        t_Counters.at(rec::Counter::truncatedDrops)++;
          if (!g_quiet) {
            SLOG(g_log << Logger::Error << "Ignoring truncated query from " << fromaddr.toString() << endl,
                 g_slogudpin->info(Logr::Error, "Ignoring truncated query", "remote", Logging::Loggable(fromaddr)));
@@ -2284,7 +2291,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
          bool tcp;
          ssize_t used = parseProxyHeader(data, proxyProto, source, destination, tcp, proxyProtocolValues);
          if (used <= 0) {
-          ++g_stats.proxyProtocolInvalidCount;
+          ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
            if (!g_quiet) {
              SLOG(g_log << Logger::Error << "Ignoring invalid proxy protocol (" << std::to_string(len) << ", " << std::to_string(used) << ") query from " << fromaddr.toStringWithPort() << endl,
                   g_slogudpin->info(Logr::Error, "Ignoring invalid proxy protocol query", "length", Logging::Loggable(len),
@@ -2298,7 +2305,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                   g_slogudpin->info(Logr::Error, "Proxy protocol header in UDP packet  is larger than proxy-protocol-maximum-size",
                                     "used", Logging::Loggable(used), "remote", Logging::Loggable(fromaddr)));
            }
-          ++g_stats.proxyProtocolInvalidCount;
+          ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
            return;
          }
  
@@ -2306,7 +2313,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
        }
        else if (len > 512) {
          /* we only allow UDP packets larger than 512 for those with a proxy protocol header */
-        g_stats.truncatedDrops++;
+        t_Counters.at(rec::Counter::truncatedDrops)++;
          if (!g_quiet) {
            SLOG(g_log << Logger::Error << "Ignoring truncated query from " << fromaddr.toStringWithPort() << endl,
                 g_slogudpin->info(Logr::Error, "Ignoring truncated query", "remote", Logging::Loggable(fromaddr)));
@@ -2315,7 +2322,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
        }
  
        if (data.size() < sizeof(dnsheader)) {
-        g_stats.ignoredCount++;
+        t_Counters.at(rec::Counter::ignoredCount)++;
          if (!g_quiet) {
            SLOG(g_log << Logger::Error << "Ignoring too-short (" << std::to_string(data.size()) << ") query from " << fromaddr.toString() << endl,
                 g_slogudpin->info(Logr::Error, "Ignoring too-short query", "length", Logging::Loggable(data.size()),
@@ -2344,7 +2351,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                 g_slogudpin->info(Logr::Error, "Dropping UDP query, address not matched by allow-from", "source", Logging::Loggable(mappedSource)));
          }
  
-        g_stats.unauthorizedUDP++;
+        t_Counters.at(rec::Counter::unauthorizedUDP)++;
          return;
        }
  
@@ -2355,7 +2362,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                 g_slogudpin->info(Logr::Error, "Dropping UDP query can't deal with port 0", "remote", Logging::Loggable(fromaddr)));
          }
  
-        g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
+        t_Counters.at(rec::Counter::clientParseError)++; // not quite the best place to put it, but needs to go somewhere
          return;
        }
  
@@ -2364,21 +2371,21 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
          const dnsheader* dh = headerdata.get();
  
          if (dh->qr) {
-          g_stats.ignoredCount++;
+          t_Counters.at(rec::Counter::ignoredCount)++;
            if (g_logCommonErrors) {
              SLOG(g_log << Logger::Error << "Ignoring answer from " << fromaddr.toString() << " on server socket!" << endl,
                   g_slogudpin->info(Logr::Error, "Ignoring answer on server socket", "remote", Logging::Loggable(fromaddr)));
            }
          }
          else if (dh->opcode != Opcode::Query && dh->opcode != Opcode::Notify) {
-          g_stats.ignoredCount++;
+          t_Counters.at(rec::Counter::ignoredCount)++;
            if (g_logCommonErrors) {
              SLOG(g_log << Logger::Error << "Ignoring unsupported opcode " << Opcode::to_s(dh->opcode) << " from " << fromaddr.toString() << " on server socket!" << endl,
                   g_slogudpin->info(Logr::Error, "Ignoring unsupported opcode server socket", "remote", Logging::Loggable(fromaddr), "opcode", Logging::Loggable(Opcode::to_s(dh->opcode))));
            }
          }
          else if (dh->qdcount == 0) {
-          g_stats.emptyQueriesCount++;
+          t_Counters.at(rec::Counter::emptyQueriesCount)++;
            if (g_logCommonErrors) {
              SLOG(g_log << Logger::Error << "Ignoring empty (qdcount == 0) query from " << fromaddr.toString() << " on server socket!" << endl,
                   g_slogudpin->info(Logr::Error, "Ignoring empty (qdcount == 0) query on server socket!", "remote", Logging::Loggable(fromaddr)));
@@ -2393,7 +2400,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                                         "source", Logging::Loggable(mappedSource)));
                }
  
-              g_stats.sourceDisallowedNotify++;
+              t_Counters.at(rec::Counter::sourceDisallowedNotify)++;
                return;
              }
            }
@@ -2435,14 +2442,14 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
          }
        }
        catch (const MOADNSException& mde) {
-        g_stats.clientParseError++;
+        t_Counters.at(rec::Counter::clientParseError)++;
          if (g_logCommonErrors) {
            SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP client " << fromaddr.toString() << ": " << mde.what() << endl,
                 g_slogudpin->error(Logr::Error, mde.what(), "Unable to parse packet from remote UDP client", "remote", Logging::Loggable(fromaddr), "exception", Logging::Loggable("MOADNSException")));
          }
        }
        catch (const std::runtime_error& e) {
-        g_stats.clientParseError++;
+        t_Counters.at(rec::Counter::clientParseError)++;
          if (g_logCommonErrors) {
            SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP client " << fromaddr.toString() << ": " << e.what() << endl,
                 g_slogudpin->error(Logr::Error, e.what(), "Unable to parse packet from remote UDP client", "remote", Logging::Loggable(fromaddr), "exception", Logging::Loggable("std::runtime_error")));
@@ -2452,12 +2459,13 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
      else {
        // cerr<<t_id<<" had error: "<<stringerror()<<endl;
        if (firstQuery && errno == EAGAIN) {
-        g_stats.noPacketError++;
+        t_Counters.at(rec::Counter::noPacketError)++;
        }
  
        break;
      }
    }
+  t_Counters.updateSnap(g_regressionTestMode);
  }
  
  void makeUDPServerSockets(deferredAdd_t& deferredAdds, Logr::log_t log)
@@ -2615,7 +2623,7 @@ static unsigned int selectWorker(unsigned int hash)
    unsigned int worker = hash % RecThreadInfo::numWorkers();
    /* at least one server has to be at or below the average load */
    if (load[worker] > targetLoad) {
-    ++g_stats.rebalancedQueries;
+    ++t_Counters.at(rec::Counter::rebalancedQueries);
      do {
        worker = (worker + 1) % RecThreadInfo::numWorkers();
      } while (load[worker] > targetLoad);
@@ -2637,7 +2645,7 @@ void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
    unsigned int hash = hashQuestion(reinterpret_cast<const uint8_t*>(packet.data()), packet.length(), g_disthashseed, ok);
    if (!ok) {
      // hashQuestion does detect invalid names, so we might as well punt here instead of in the worker thread
-    g_stats.ignoredCount++;
+    t_Counters.at(rec::Counter::ignoredCount)++;
      throw MOADNSException("too-short (" + std::to_string(packet.length()) + ") or invalid name");
    }
    unsigned int target = selectWorker(hash);
@@ -2655,7 +2663,7 @@ void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
      } while (newTarget == target);
  
      if (!trySendingQueryToWorker(newTarget, tmsg)) {
-      g_stats.queryPipeFullDrops++;
+      t_Counters.at(rec::Counter::queryPipeFullDrops)++;
        delete tmsg;
      }
    }
@@ -2675,7 +2683,7 @@ static void doResends(MT_t::waiters_t::iterator& iter, const std::shared_ptr<Pac
      r->fd = -1;
      r->id = *i;
      MT->sendEvent(r, &content);
-    g_stats.chainResends++;
+    t_Counters.at(rec::Counter::chainResends)++;
    }
  }
  
@@ -2694,7 +2702,7 @@ static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
      if (len < 0)
        ; //      cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
      else {
-      g_stats.serverParseError++;
+      t_Counters.at(rec::Counter::serverParseError)++;
        if (g_logCommonErrors)
          SLOG(g_log << Logger::Error << "Unable to parse packet from remote UDP server " << fromaddr.toString() << ": packet smaller than DNS header" << endl,
               g_slogout->info(Logr::Error, "Unable to parse packet from remote UDP server", "from", Logging::Loggable(fromaddr)));
@@ -2736,7 +2744,7 @@ static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
          pident->domain = DNSName(reinterpret_cast<const char*>(packet.data()), len, 12, false, &pident->type); // don't copy this from above - we need to do the actual read
      }
      catch (std::exception& e) {
-      g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
+      t_Counters.at(rec::Counter::serverParseError)++; // won't be fed to lwres.cc, so we have to increment
        SLOG(g_log << Logger::Warning << "Error in packet from remote nameserver " << fromaddr.toStringWithPort() << ": " << e.what() << endl,
             g_slogudpin->error(Logr::Warning, "Error in packet from remote nameserver", "from", Logging::Loggable(fromaddr)));
        return;
@@ -2770,7 +2778,7 @@ retryWithName:
          goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
        }
      }
-    g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
+    t_Counters.at(rec::Counter::unexpectedCount)++; // if we made it here, it really is an unexpected answer
      if (g_logCommonErrors) {
        SLOG(g_log << Logger::Warning << "Discarding unexpected packet from " << fromaddr.toStringWithPort() << ": " << (pident->domain.empty() ? "<empty>" : pident->domain.toString()) << ", " << pident->type << ", " << MT->d_waiters.size() << " waiters" << endl,
             g_slogudpin->info(Logr::Warning, "Discarding unexpected packet", "from", Logging::Loggable(fromaddr),
diff --git a/pdns/rec_channel_rec.cc b/pdns/rec_channel_rec.cc

index 52c93bf4189f1a21fd667b241644464edf2c4c3b..cae909fd026071aa28f4fbfaf2962257e1a2d178 100644 (file)
--- a/pdns/rec_channel_rec.cc
+++ b/pdns/rec_channel_rec.cc
@@ -1149,7 +1149,7 @@ static uint64_t doGetMallocated()
    return 0;
  }
  
-static StatsMap toStatsMap(const string& name, const pdns::AtomicHistogram& histogram)
+static StatsMap toStatsMap(const string& name, const pdns::Histogram& histogram)
  {
    const auto& data = histogram.getCumulativeBuckets();
    const string pbasename = getPrometheusName(name);
@@ -1169,7 +1169,7 @@ static StatsMap toStatsMap(const string& name, const pdns::AtomicHistogram& hist
    return entries;
  }
  
-static StatsMap toStatsMap(const string& name, const pdns::AtomicHistogram& histogram4, const pdns::AtomicHistogram& histogram6)
+static StatsMap toStatsMap(const string& name, const pdns::Histogram& histogram4, const pdns::Histogram& histogram6)
  {
    const string pbasename = getPrometheusName(name);
    StatsMap entries;
@@ -1199,13 +1199,14 @@ static StatsMap toStatsMap(const string& name, const pdns::AtomicHistogram& hist
    return entries;
  }
  
-static StatsMap toAuthRCodeStatsMap(const string& name, const std::array<pdns::stat_t, 16>& v)
+static StatsMap toAuthRCodeStatsMap(const string& name)
  {
    const string pbasename = getPrometheusName(name);
    StatsMap entries;
  
    uint8_t n = 0;
-  for (const auto& entry : v) {
+  auto rcodes = g_Counters.sum(rec::RCode::auth).rcodeCounters;
+  for (const auto& entry : rcodes) {
      const auto key = RCode::to_short_s(n);
      std::string pname = pbasename + "{rcode=\"" + key + "\"}";
      entries.emplace("auth-" + key + "-answers", StatsMapEntry{pname, std::to_string(entry)});
@@ -1313,9 +1314,9 @@ static StatsMap toRemoteLoggerStatsMap(const string& name)
  
  static void registerAllStats1()
  {
-  addGetStat("questions", &g_stats.qcounter);
-  addGetStat("ipv6-questions", &g_stats.ipv6qcounter);
-  addGetStat("tcp-questions", &g_stats.tcpqcounter);
+  addGetStat("questions", [] { return g_Counters.sum(rec::Counter::qcounter); });
+  addGetStat("ipv6-questions", [] { return g_Counters.sum(rec::Counter::ipv6qcounter); });
+  addGetStat("tcp-questions", [] { return g_Counters.sum(rec::Counter::tcpqcounter); });
  
    addGetStat("cache-hits", doGetCacheHits);
    addGetStat("cache-misses", doGetCacheMisses);
@@ -1339,63 +1340,63 @@ static void registerAllStats1()
  
    addGetStat("malloc-bytes", doGetMallocated);
  
-  addGetStat("servfail-answers", &g_stats.servFails);
-  addGetStat("nxdomain-answers", &g_stats.nxDomains);
-  addGetStat("noerror-answers", &g_stats.noErrors);
-
-  addGetStat("unauthorized-udp", &g_stats.unauthorizedUDP);
-  addGetStat("unauthorized-tcp", &g_stats.unauthorizedTCP);
-  addGetStat("source-disallowed-notify", &g_stats.sourceDisallowedNotify);
-  addGetStat("zone-disallowed-notify", &g_stats.zoneDisallowedNotify);
-  addGetStat("tcp-client-overflow", &g_stats.tcpClientOverflow);
-
-  addGetStat("client-parse-errors", &g_stats.clientParseError);
-  addGetStat("server-parse-errors", &g_stats.serverParseError);
-  addGetStat("too-old-drops", &g_stats.tooOldDrops);
-  addGetStat("truncated-drops", &g_stats.truncatedDrops);
-  addGetStat("query-pipe-full-drops", &g_stats.queryPipeFullDrops);
-
-  addGetStat("answers0-1", []() { return g_stats.answers.getCount(0); });
-  addGetStat("answers1-10", []() { return g_stats.answers.getCount(1); });
-  addGetStat("answers10-100", []() { return g_stats.answers.getCount(2); });
-  addGetStat("answers100-1000", []() { return g_stats.answers.getCount(3); });
-  addGetStat("answers-slow", []() { return g_stats.answers.getCount(4); });
-
-  addGetStat("x-ourtime0-1", []() { return g_stats.ourtime.getCount(0); });
-  addGetStat("x-ourtime1-2", []() { return g_stats.ourtime.getCount(1); });
-  addGetStat("x-ourtime2-4", []() { return g_stats.ourtime.getCount(2); });
-  addGetStat("x-ourtime4-8", []() { return g_stats.ourtime.getCount(3); });
-  addGetStat("x-ourtime8-16", []() { return g_stats.ourtime.getCount(4); });
-  addGetStat("x-ourtime16-32", []() { return g_stats.ourtime.getCount(5); });
-  addGetStat("x-ourtime-slow", []() { return g_stats.ourtime.getCount(6); });
-
-  addGetStat("auth4-answers0-1", []() { return g_stats.auth4Answers.getCount(0); });
-  addGetStat("auth4-answers1-10", []() { return g_stats.auth4Answers.getCount(1); });
-  addGetStat("auth4-answers10-100", []() { return g_stats.auth4Answers.getCount(2); });
-  addGetStat("auth4-answers100-1000", []() { return g_stats.auth4Answers.getCount(3); });
-  addGetStat("auth4-answers-slow", []() { return g_stats.auth4Answers.getCount(4); });
-
-  addGetStat("auth6-answers0-1", []() { return g_stats.auth6Answers.getCount(0); });
-  addGetStat("auth6-answers1-10", []() { return g_stats.auth6Answers.getCount(1); });
-  addGetStat("auth6-answers10-100", []() { return g_stats.auth6Answers.getCount(2); });
-  addGetStat("auth6-answers100-1000", []() { return g_stats.auth6Answers.getCount(3); });
-  addGetStat("auth6-answers-slow", []() { return g_stats.auth6Answers.getCount(4); });
-
-  addGetStat("qa-latency", []() { return round(g_stats.avgLatencyUsec.load()); });
-  addGetStat("x-our-latency", []() { return round(g_stats.avgLatencyOursUsec.load()); });
-  addGetStat("unexpected-packets", &g_stats.unexpectedCount);
-  addGetStat("case-mismatches", &g_stats.caseMismatchCount);
-  addGetStat("spoof-prevents", &g_stats.spoofCount);
-
-  addGetStat("nsset-invalidations", &g_stats.nsSetInvalidations);
-
-  addGetStat("resource-limits", &g_stats.resourceLimits);
-  addGetStat("over-capacity-drops", &g_stats.overCapacityDrops);
-  addGetStat("policy-drops", &g_stats.policyDrops);
-  addGetStat("no-packet-error", &g_stats.noPacketError);
-  addGetStat("ignored-packets", &g_stats.ignoredCount);
-  addGetStat("empty-queries", &g_stats.emptyQueriesCount);
-  addGetStat("max-mthread-stack", &g_stats.maxMThreadStackUsage);
+  addGetStat("servfail-answers", [] { return g_Counters.sum(rec::Counter::servFails); });
+  addGetStat("nxdomain-answers", [] { return g_Counters.sum(rec::Counter::nxDomains); });
+  addGetStat("noerror-answers", [] { return g_Counters.sum(rec::Counter::noErrors); });
+
+  addGetStat("unauthorized-udp", [] { return g_Counters.sum(rec::Counter::unauthorizedUDP); });
+  addGetStat("unauthorized-tcp", [] { return g_Counters.sum(rec::Counter::unauthorizedTCP); });
+  addGetStat("source-disallowed-notify", [] { return g_Counters.sum(rec::Counter::sourceDisallowedNotify); });
+  addGetStat("zone-disallowed-notify", [] { return g_Counters.sum(rec::Counter::zoneDisallowedNotify); });
+  addGetStat("tcp-client-overflow", [] { return g_Counters.sum(rec::Counter::tcpClientOverflow); });
+
+  addGetStat("client-parse-errors", [] { return g_Counters.sum(rec::Counter::clientParseError); });
+  addGetStat("server-parse-errors", [] { return g_Counters.sum(rec::Counter::serverParseError); });
+  addGetStat("too-old-drops", [] { return g_Counters.sum(rec::Counter::tooOldDrops); });
+  addGetStat("truncated-drops", [] { return g_Counters.sum(rec::Counter::truncatedDrops); });
+  addGetStat("query-pipe-full-drops", [] { return g_Counters.sum(rec::Counter::queryPipeFullDrops); });
+
+  addGetStat("answers0-1", []() { return g_Counters.sum(rec::Histogram::answers).getCount(0); });
+  addGetStat("answers1-10", []() { return g_Counters.sum(rec::Histogram::answers).getCount(1); });
+  addGetStat("answers10-100", []() { return g_Counters.sum(rec::Histogram::answers).getCount(2); });
+  addGetStat("answers100-1000", []() { return g_Counters.sum(rec::Histogram::answers).getCount(3); });
+  addGetStat("answers-slow", []() { return g_Counters.sum(rec::Histogram::answers).getCount(4); });
+
+  addGetStat("x-ourtime0-1", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(0); });
+  addGetStat("x-ourtime1-2", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(1); });
+  addGetStat("x-ourtime2-4", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(2); });
+  addGetStat("x-ourtime4-8", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(3); });
+  addGetStat("x-ourtime8-16", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(4); });
+  addGetStat("x-ourtime16-32", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(5); });
+  addGetStat("x-ourtime-slow", []() { return g_Counters.sum(rec::Histogram::ourtime).getCount(6); });
+
+  addGetStat("auth4-answers0-1", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(0); });
+  addGetStat("auth4-answers1-10", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(1); });
+  addGetStat("auth4-answers10-100", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(2); });
+  addGetStat("auth4-answers100-1000", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(3); });
+  addGetStat("auth4-answers-slow", []() { return g_Counters.sum(rec::Histogram::auth4Answers).getCount(4); });
+
+  addGetStat("auth6-answers0-1", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(0); });
+  addGetStat("auth6-answers1-10", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(1); });
+  addGetStat("auth6-answers10-100", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(2); });
+  addGetStat("auth6-answers100-1000", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(3); });
+  addGetStat("auth6-answers-slow", []() { return g_Counters.sum(rec::Histogram::auth6Answers).getCount(4); });
+
+  addGetStat("qa-latency", []() { return round(g_Counters.avg(rec::DoubleWAvgCounter::avgLatencyUsec)); });
+  addGetStat("x-our-latency", []() { return round(g_Counters.avg(rec::DoubleWAvgCounter::avgLatencyOursUsec)); });
+  addGetStat("unexpected-packets", [] { return g_Counters.sum(rec::Counter::unexpectedCount); });
+  addGetStat("case-mismatches", [] { return g_Counters.sum(rec::Counter::caseMismatchCount); });
+  addGetStat("spoof-prevents", [] { return g_Counters.sum(rec::Counter::spoofCount); });
+
+  addGetStat("nsset-invalidations", [] { return g_Counters.sum(rec::Counter::nsSetInvalidations); });
+
+  addGetStat("resource-limits", [] { return g_Counters.sum(rec::Counter::resourceLimits); });
+  addGetStat("over-capacity-drops", [] { return g_Counters.sum(rec::Counter::overCapacityDrops); });
+  addGetStat("policy-drops", [] { return g_Counters.sum(rec::Counter::policyDrops); });
+  addGetStat("no-packet-error", [] { return g_Counters.sum(rec::Counter::noPacketError); });
+  addGetStat("ignored-packets", [] { return g_Counters.sum(rec::Counter::ignoredCount); });
+  addGetStat("empty-queries", [] { return g_Counters.sum(rec::Counter::emptyQueriesCount); });
+  addGetStat("max-mthread-stack", [] { return g_Counters.max(rec::Counter::maxMThreadStackUsage); });
  
    addGetStat("negcache-entries", getNegCacheSize);
    addGetStat("throttle-entries", SyncRes::getThrottledServersSize);
@@ -1406,22 +1407,22 @@ static void registerAllStats1()
  
    addGetStat("concurrent-queries", getConcurrentQueries);
    addGetStat("security-status", &g_security_status);
-  addGetStat("outgoing-timeouts", &SyncRes::s_outgoingtimeouts);
-  addGetStat("outgoing4-timeouts", &SyncRes::s_outgoing4timeouts);
-  addGetStat("outgoing6-timeouts", &SyncRes::s_outgoing6timeouts);
-  addGetStat("auth-zone-queries", &SyncRes::s_authzonequeries);
-  addGetStat("tcp-outqueries", &SyncRes::s_tcpoutqueries);
-  addGetStat("dot-outqueries", &SyncRes::s_dotoutqueries);
-  addGetStat("all-outqueries", &SyncRes::s_outqueries);
-  addGetStat("ipv6-outqueries", &g_stats.ipv6queries);
-  addGetStat("throttled-outqueries", &SyncRes::s_throttledqueries);
-  addGetStat("dont-outqueries", &SyncRes::s_dontqueries);
-  addGetStat("qname-min-fallback-success", &SyncRes::s_qnameminfallbacksuccess);
-  addGetStat("throttled-out", &SyncRes::s_throttledqueries);
-  addGetStat("unreachables", &SyncRes::s_unreachables);
+  addGetStat("outgoing-timeouts", [] { return g_Counters.sum(rec::Counter::outgoingtimeouts); });
+  addGetStat("outgoing4-timeouts", [] { return g_Counters.sum(rec::Counter::outgoing4timeouts); });
+  addGetStat("outgoing6-timeouts", [] { return g_Counters.sum(rec::Counter::outgoing6timeouts); });
+  addGetStat("auth-zone-queries", [] { return g_Counters.sum(rec::Counter::authzonequeries); });
+  addGetStat("tcp-outqueries", [] { return g_Counters.sum(rec::Counter::tcpoutqueries); });
+  addGetStat("dot-outqueries", [] { return g_Counters.sum(rec::Counter::dotoutqueries); });
+  addGetStat("all-outqueries", [] { return g_Counters.sum(rec::Counter::outqueries); });
+  addGetStat("ipv6-outqueries", [] { return g_Counters.sum(rec::Counter::ipv6queries); });
+  addGetStat("throttled-outqueries", [] { return g_Counters.sum(rec::Counter::throttledqueries); });
+  addGetStat("dont-outqueries", [] { return g_Counters.sum(rec::Counter::dontqueries); });
+  addGetStat("qname-min-fallback-success", [] { return g_Counters.sum(rec::Counter::qnameminfallbacksuccess); });
+  addGetStat("throttled-out", [] { return g_Counters.sum(rec::Counter::throttledqueries); });
+  addGetStat("unreachables", [] { return g_Counters.sum(rec::Counter::unreachables); });
    addGetStat("ecs-queries", &SyncRes::s_ecsqueries);
    addGetStat("ecs-responses", &SyncRes::s_ecsresponses);
-  addGetStat("chain-resends", &g_stats.chainResends);
+  addGetStat("chain-resends", [] { return g_Counters.sum(rec::Counter::chainResends); });
    addGetStat("tcp-clients", [] { return TCPConnection::getCurrentConnections(); });
  
  #ifdef __linux__
@@ -1437,17 +1438,17 @@ static void registerAllStats1()
    addGetStat("udp6-in-csum-errors", [] { return udp6ErrorStats("udp6-in-csum-errors"); });
  #endif
  
-  addGetStat("edns-ping-matches", &g_stats.ednsPingMatches);
-  addGetStat("edns-ping-mismatches", &g_stats.ednsPingMismatches);
-  addGetStat("dnssec-queries", &g_stats.dnssecQueries);
+  addGetStat("edns-ping-matches", [] { return g_Counters.sum(rec::Counter::ednsPingMatches); });
+  addGetStat("edns-ping-mismatches", [] { return g_Counters.sum(rec::Counter::ednsPingMismatches); });
+  addGetStat("dnssec-queries", [] { return g_Counters.sum(rec::Counter::dnssecQueries); });
  
-  addGetStat("dnssec-authentic-data-queries", &g_stats.dnssecAuthenticDataQueries);
-  addGetStat("dnssec-check-disabled-queries", &g_stats.dnssecCheckDisabledQueries);
+  addGetStat("dnssec-authentic-data-queries", [] { return g_Counters.sum(rec::Counter::dnssecAuthenticDataQueries); });
+  addGetStat("dnssec-check-disabled-queries", [] { return g_Counters.sum(rec::Counter::dnssecCheckDisabledQueries); });
  
-  addGetStat("variable-responses", &g_stats.variableResponses);
+  addGetStat("variable-responses", [] { return g_Counters.sum(rec::Counter::variableResponses); });
  
-  addGetStat("noping-outqueries", &g_stats.noPingOutQueries);
-  addGetStat("noedns-outqueries", &g_stats.noEdnsOutQueries);
+  addGetStat("noping-outqueries", [] { return g_Counters.sum(rec::Counter::noPingOutQueries); });
+  addGetStat("noedns-outqueries", [] { return g_Counters.sum(rec::Counter::noEdnsOutQueries); });
  
    addGetStat("uptime", calculateUptime);
    addGetStat("real-memory-usage", [] { return getRealMemoryUsage(string()); });
@@ -1471,7 +1472,7 @@ static void registerAllStats1()
    addGetStat("memory-allocated", [] { return g_mtracer->getTotAllocated(string()); });
  #endif
  
-  addGetStat("dnssec-validations", &g_stats.dnssecValidations);
+  addGetStat("dnssec-validations", [] { return g_Counters.sum(rec::Counter::dnssecValidations); });
    addGetStat("dnssec-result-insecure", &g_stats.dnssecResults[vState::Insecure]);
    addGetStat("dnssec-result-secure", &g_stats.dnssecResults[vState::Secure]);
    addGetStat("dnssec-result-bogus", []() {
@@ -1538,17 +1539,17 @@ static void registerAllStats1()
    addGetStat("policy-result-truncate", &g_stats.policyResults[DNSFilterEngine::PolicyKind::Truncate]);
    addGetStat("policy-result-custom", &g_stats.policyResults[DNSFilterEngine::PolicyKind::Custom]);
  
-  addGetStat("rebalanced-queries", &g_stats.rebalancedQueries);
+  addGetStat("rebalanced-queries", [] { return g_Counters.sum(rec::Counter::rebalancedQueries); });
  
-  addGetStat("proxy-protocol-invalid", &g_stats.proxyProtocolInvalidCount);
+  addGetStat("proxy-protocol-invalid", [] { return g_Counters.sum(rec::Counter::proxyProtocolInvalidCount); });
  
-  addGetStat("nod-lookups-dropped-oversize", &g_stats.nodLookupsDroppedOversize);
+  addGetStat("nod-lookups-dropped-oversize", [] { return g_Counters.sum(rec::Counter::nodLookupsDroppedOversize); });
  
    addGetStat("taskqueue-pushed", []() { return getTaskPushes(); });
    addGetStat("taskqueue-expired", []() { return getTaskExpired(); });
    addGetStat("taskqueue-size", []() { return getTaskSize(); });
  
-  addGetStat("dns64-prefix-answers", &g_stats.dns64prefixanswers);
+  addGetStat("dns64-prefix-answers", [] { return g_Counters.sum(rec::Counter::dns64prefixanswers); });
  
    addGetStat("almost-expired-pushed", []() { return getAlmostExpiredTasksPushed(); });
    addGetStat("almost-expired-run", []() { return getAlmostExpiredTasksRun(); });
@@ -1556,8 +1557,8 @@ static void registerAllStats1()
  
    addGetStat("idle-tcpout-connections", getCurrentIdleTCPConnections);
  
-  addGetStat("maintenance-usec", &g_stats.maintenanceUsec);
-  addGetStat("maintenance-calls", &g_stats.maintenanceCalls);
+  addGetStat("maintenance-usec", [] { return g_Counters.sum(rec::Counter::maintenanceUsec); });
+  addGetStat("maintenance-calls", [] { return g_Counters.sum(rec::Counter::maintenanceCalls); });
  
    /* make sure that the ECS stats are properly initialized */
    SyncRes::clearECSStats();
@@ -1571,10 +1572,10 @@ static void registerAllStats1()
    }
  
    addGetStat("cumul-clientanswers", []() {
-    return toStatsMap(g_stats.cumulativeAnswers.getName(), g_stats.cumulativeAnswers);
+    return toStatsMap(t_Counters.at(rec::Histogram::cumulativeAnswers).getName(), g_Counters.sum(rec::Histogram::cumulativeAnswers));
    });
    addGetStat("cumul-authanswers", []() {
-    return toStatsMap(g_stats.cumulativeAuth4Answers.getName(), g_stats.cumulativeAuth4Answers, g_stats.cumulativeAuth6Answers);
+    return toStatsMap(t_Counters.at(rec::Histogram::cumulativeAuth4Answers).getName(), g_Counters.sum(rec::Histogram::cumulativeAuth4Answers), g_Counters.sum(rec::Histogram::cumulativeAuth6Answers));
    });
    addGetStat("policy-hits", []() {
      return toRPZStatsMap("policy-hits", g_stats.policyHits);
@@ -1583,7 +1584,7 @@ static void registerAllStats1()
      return toProxyMappingStatsMap("proxy-mapping-total");
    });
    addGetStat("auth-rcode-answers", []() {
-    return toAuthRCodeStatsMap("auth-rcode-answers", g_stats.authRCode);
+    return toAuthRCodeStatsMap("auth-rcode-answers");
    });
    addGetStat("remote-logger-count", []() {
      return toRemoteLoggerStatsMap("remote-logger-count");
diff --git a/pdns/recursordist/Makefile.am b/pdns/recursordist/Makefile.am

index 629976e0d15aeb025b53172422ab19d2e81e8838..0371e08c3f22b74c632cab42c9d345659a326309 100644 (file)
--- a/pdns/recursordist/Makefile.am
+++ b/pdns/recursordist/Makefile.am
@@ -171,6 +171,7 @@ pdns_recursor_SOURCES = \
         rec-protozero.cc rec-protozero.hh \
         rec-snmp.hh rec-snmp.cc \
         rec-taskqueue.cc rec-taskqueue.hh \
+       rec-tcounters.cc rec-tcounters.hh \
         rec-tcp.cc \
         rec-tcpout.cc rec-tcpout.hh \
         rec-zonetocache.cc rec-zonetocache.hh \
@@ -201,6 +202,7 @@ pdns_recursor_SOURCES = \
         svc-records.cc svc-records.hh \
         syncres.cc syncres.hh \
         taskqueue.cc taskqueue.hh \
+       tcounters.hh \
         tcpiohandler.cc tcpiohandler.hh \
         threadname.hh threadname.cc \
         tsigverifier.cc tsigverifier.hh \
@@ -294,6 +296,7 @@ testrunner_SOURCES = \
         rcpgenerator.cc \
         rec-eventtrace.cc rec-eventtrace.hh \
         rec-taskqueue.cc rec-taskqueue.hh \
+       rec-tcounters.cc rec-tcounters.hh \
         rec-zonetocache.cc rec-zonetocache.hh \
         recpacketcache.cc recpacketcache.hh \
         recursor_cache.cc recursor_cache.hh \
@@ -334,6 +337,7 @@ testrunner_SOURCES = \
         test-packetcache_hh.cc \
         test-rcpgenerator_cc.cc \
         test-rec-taskqueue.cc \
+       test-rec-tcounters_cc.cc \
         test-rec-zonetocache.cc \
         test-recpacketcache_cc.cc \
         test-recursorcache_cc.cc \
diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc

index 415b7c83386a4067e385046856fce8c189823d45..a74515ff5783db076c811093243a5caa1844872b 100644 (file)
--- a/pdns/recursordist/rec-main.cc
+++ b/pdns/recursordist/rec-main.cc
@@ -62,6 +62,7 @@ thread_local FrameStreamServersInfo t_nodFrameStreamServersInfo;
  string g_programname = "pdns_recursor";
  string g_pidfname;
  RecursorControlChannel g_rcc; // only active in the handler thread
+bool g_regressionTestMode;
  
  #ifdef NOD_ENABLED
  bool g_nodEnabled;
@@ -997,9 +998,16 @@ static void doStats(void)
  
    auto log = g_slog->withName("stats");
  
-  if (g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
+  auto qcounter = g_Counters.sum(rec::Counter::qcounter);
+  auto syncresqueries = g_Counters.sum(rec::Counter::syncresqueries);
+  auto outqueries = g_Counters.sum(rec::Counter::outqueries);
+  auto throttledqueries = g_Counters.sum(rec::Counter::throttledqueries);
+  auto tcpoutqueries = g_Counters.sum(rec::Counter::tcpoutqueries);
+  auto dotoutqueries = g_Counters.sum(rec::Counter::dotoutqueries);
+  auto outgoingtimeouts = g_Counters.sum(rec::Counter::outgoingtimeouts);
+  if (qcounter > 0 && (cacheHits + cacheMisses) > 0 && syncresqueries > 0 && outqueries > 0) {
      if (!g_slogStructured) {
-      g_log << Logger::Notice << "stats: " << g_stats.qcounter << " questions, " << cacheSize << " cache entries, " << negCacheSize << " negative entries, " << ratePercentage(cacheHits, cacheHits + cacheMisses) << "% cache hits" << endl;
+      g_log << Logger::Notice << "stats: " << qcounter << " questions, " << cacheSize << " cache entries, " << negCacheSize << " negative entries, " << ratePercentage(cacheHits, cacheHits + cacheMisses) << "% cache hits" << endl;
        g_log << Logger::Notice << "stats: cache contended/acquired " << rc_stats.first << '/' << rc_stats.second << " = " << r << '%' << endl;
  
        g_log << Logger::Notice << "stats: throttle map: "
@@ -1010,18 +1018,18 @@ static void doStats(void)
              << SyncRes::getNonResolvingNSSize() << ", saved-parentsets: "
              << SyncRes::getSaveParentsNSSetsSize()
              << endl;
-      g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries) << "%";
-      g_log << Logger::Notice << ", " << ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries + SyncRes::s_throttledqueries) << "% throttled" << endl;
-      g_log << Logger::Notice << "stats: " << SyncRes::s_tcpoutqueries << "/" << SyncRes::s_dotoutqueries << "/" << getCurrentIdleTCPConnections() << " outgoing tcp/dot/idle connections, " << broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries) << " queries running, " << SyncRes::s_outgoingtimeouts << " outgoing timeouts " << endl;
+      g_log << Logger::Notice << "stats: outpacket/query ratio " << ratePercentage(outqueries, syncresqueries) << "%";
+      g_log << Logger::Notice << ", " << ratePercentage(throttledqueries, outqueries + throttledqueries) << "% throttled" << endl;
+      g_log << Logger::Notice << "stats: " << tcpoutqueries << "/" << dotoutqueries << "/" << getCurrentIdleTCPConnections() << " outgoing tcp/dot/idle connections, " << broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries) << " queries running, " << outgoingtimeouts << " outgoing timeouts " << endl;
  
-      g_log << Logger::Notice << "stats: " << pcSize << " packet cache entries, " << ratePercentage(pcHits, g_stats.qcounter) << "% packet cache hits" << endl;
+      g_log << Logger::Notice << "stats: " << pcSize << " packet cache entries, " << ratePercentage(pcHits, qcounter) << "% packet cache hits" << endl;
  
        g_log << Logger::Notice << "stats: tasks pushed/expired/queuesize: " << taskPushes << '/' << taskExpired << '/' << taskSize << endl;
      }
      else {
        const string m = "Periodic statistics report";
        log->info(Logr::Info, m,
-                "questions", Logging::Loggable(g_stats.qcounter),
+                "questions", Logging::Loggable(qcounter),
                  "cache-entries", Logging::Loggable(cacheSize),
                  "negcache-entries", Logging::Loggable(negCacheSize),
                  "record-cache-hitratio-perc", Logging::Loggable(ratePercentage(cacheHits, cacheHits + cacheMisses)),
@@ -1035,17 +1043,17 @@ static void doStats(void)
                  "edns-entries", Logging::Loggable(SyncRes::getEDNSStatusesSize()),
                  "non-resolving-nameserver-entries", Logging::Loggable(SyncRes::getNonResolvingNSSize()),
                  "saved-parent-ns-sets-entries", Logging::Loggable(SyncRes::getSaveParentsNSSetsSize()),
-                "outqueries-per-query", Logging::Loggable(ratePercentage(SyncRes::s_outqueries, SyncRes::s_queries)));
+                "outqueries-per-query", Logging::Loggable(ratePercentage(outqueries, syncresqueries)));
        log->info(Logr::Info, m,
-                "throttled-queries-perc", Logging::Loggable(ratePercentage(SyncRes::s_throttledqueries, SyncRes::s_outqueries + SyncRes::s_throttledqueries)),
-                "tcp-outqueries", Logging::Loggable(SyncRes::s_tcpoutqueries),
-                "dot-outqueries", Logging::Loggable(SyncRes::s_dotoutqueries),
+                "throttled-queries-perc", Logging::Loggable(ratePercentage(throttledqueries, outqueries + throttledqueries)),
+                "tcp-outqueries", Logging::Loggable(tcpoutqueries),
+                "dot-outqueries", Logging::Loggable(dotoutqueries),
                  "idle-tcpout-connections", Logging::Loggable(getCurrentIdleTCPConnections()),
                  "concurrent-queries", Logging::Loggable(broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)),
-                "outgoing-timeouts", Logging::Loggable(SyncRes::s_outgoingtimeouts));
+                "outgoing-timeouts", Logging::Loggable(outgoingtimeouts));
        log->info(Logr::Info, m,
                  "packetcache-entries", Logging::Loggable(pcSize),
-                "packetcache-hitratio-perc", Logging::Loggable(ratePercentage(pcHits, g_stats.qcounter)),
+                "packetcache-hitratio-perc", Logging::Loggable(ratePercentage(pcHits, qcounter)),
                  "taskqueue-pushed", Logging::Loggable(taskPushes),
                  "taskqueue-expired", Logging::Loggable(taskExpired),
                  "taskqueue-size", Logging::Loggable(taskSize));
@@ -1060,12 +1068,12 @@ static void doStats(void)
      }
      time_t now = time(0);
      if (lastOutputTime && lastQueryCount && now != lastOutputTime) {
-      SLOG(g_log << Logger::Notice << "stats: " << (g_stats.qcounter - lastQueryCount) / (now - lastOutputTime) << " qps (average over " << (now - lastOutputTime) << " seconds)" << endl,
-           log->info(Logr::Info, "Periodic QPS report", "qps", Logging::Loggable((g_stats.qcounter - lastQueryCount) / (now - lastOutputTime)),
+      SLOG(g_log << Logger::Notice << "stats: " << (qcounter - lastQueryCount) / (now - lastOutputTime) << " qps (average over " << (now - lastOutputTime) << " seconds)" << endl,
+           log->info(Logr::Info, "Periodic QPS report", "qps", Logging::Loggable((qcounter - lastQueryCount) / (now - lastOutputTime)),
                       "averagedOver", Logging::Loggable(now - lastOutputTime)));
      }
      lastOutputTime = now;
-    lastQueryCount = g_stats.qcounter;
+    lastQueryCount = qcounter;
    }
    else if (statsWanted) {
      SLOG(g_log << Logger::Notice << "stats: no stats yet!" << endl,
@@ -1354,6 +1362,7 @@ static int serviceMain(int argc, char* argv[], Logr::log_t log)
    g_log.setName(g_programname);
    g_log.disableSyslog(::arg().mustDo("disable-syslog"));
    g_log.setTimestamps(::arg().mustDo("log-timestamp"));
+  g_regressionTestMode = ::arg().mustDo("devonly-regression-test-mode");
  
    if (!::arg()["logging-facility"].empty()) {
      int val = logFacilityToLOG(::arg().asNum("logging-facility"));
@@ -1950,6 +1959,9 @@ static int serviceMain(int argc, char* argv[], Logr::log_t log)
    disableStats(StatComponent::RecControl, ::arg()["stats-rec-control-disabled-list"]);
    disableStats(StatComponent::SNMP, ::arg()["stats-snmp-disabled-list"]);
  
+  // Run before any thread doing stats related things
+  registerAllStats();
+
    if (::arg().mustDo("snmp-agent")) {
  #ifdef HAVE_NET_SNMP
      string setting = ::arg()["snmp-daemon-socket"];
@@ -2120,6 +2132,7 @@ static void houseKeeping(void*)
  
      struct timeval now;
      Utility::gettimeofday(&now);
+    t_Counters.updateSnap(now, g_regressionTestMode);
  
      // Below are the tasks that run for every recursorThread, including handler and taskThread
      if (t_packetCache) {
@@ -2225,7 +2238,10 @@ static void houseKeeping(void*)
        const unsigned int minRootRefreshInterval = 10;
        static PeriodicTask rootUpdateTask{"rootUpdateTask", std::max(SyncRes::s_maxcachettl * 8 / 10, minRootRefreshInterval)};
        rootUpdateTask.runIfDue(now, [now, &log, minRootRefreshInterval]() {
-        int res = SyncRes::getRootNS(now, nullptr, 0, log);
+        int res = 0;
+        if (!g_regressionTestMode) {
+          res = SyncRes::getRootNS(now, nullptr, 0, log);
+        }
          if (res == 0) {
            // Success, go back to the defaut period
            rootUpdateTask.setPeriod(std::max(SyncRes::s_maxcachettl * 8 / 10, minRootRefreshInterval));
@@ -2292,6 +2308,7 @@ static void houseKeeping(void*)
          }
        });
      }
+    t_Counters.updateSnap(g_regressionTestMode);
      t_running = false;
    }
    catch (const PDNSException& ae) {
@@ -2311,6 +2328,7 @@ static void houseKeeping(void*)
  static void recursorThread()
  {
    auto log = g_slog->withName("runtime");
+  t_Counters.updateSnap(true);
    try {
      auto& threadInfo = RecThreadInfo::self();
      {
@@ -2443,8 +2461,6 @@ static void recursorThread()
        }
      }
  
-    registerAllStats();
-
      if (threadInfo.isHandler()) {
        t_fdm->addReadFD(g_rcc.d_fd, handleRCC); // control channel
      }
@@ -2457,7 +2473,6 @@ static void recursorThread()
      time_t last_carbon = 0, last_lua_maintenance = 0;
      time_t carbonInterval = ::arg().asNum("carbon-interval");
      time_t luaMaintenanceInterval = ::arg().asNum("lua-maintenance-interval");
-    s_counter.store(0); // used to periodically execute certain tasks
  
  #ifdef HAVE_SYSTEMD
      if (threadInfo.isHandler()) {
@@ -2467,7 +2482,6 @@ static void recursorThread()
        sd_notify(0, "READY=1");
      }
  #endif
-
      while (!RecursorControlChannel::stop) {
        while (MT->schedule(&g_now))
          ; // MTasker letting the mthreads do their thing
@@ -2481,8 +2495,8 @@ static void recursorThread()
          if (!threadInfo.isTaskThread()) {
            struct timeval stop;
            Utility::gettimeofday(&stop);
-          g_stats.maintenanceUsec += uSec(stop - start);
-          ++g_stats.maintenanceCalls;
+          t_Counters.at(rec::Counter::maintenanceUsec) += uSec(stop - start);
+          ++t_Counters.at(rec::Counter::maintenanceCalls);
          }
        }
  
@@ -2526,8 +2540,8 @@ static void recursorThread()
              last_lua_maintenance = g_now.tv_sec;
              struct timeval stop;
              Utility::gettimeofday(&stop);
-            g_stats.maintenanceUsec += uSec(stop - start);
-            ++g_stats.maintenanceCalls;
+            t_Counters.at(rec::Counter::maintenanceUsec) += uSec(stop - start);
+            ++t_Counters.at(rec::Counter::maintenanceCalls);
            }
          }
        }
@@ -2587,6 +2601,8 @@ int main(int argc, char** argv)
  #else
      ::arg().set("stack-size", "stack size per mthread") = "200000";
  #endif
+    // This mode forces metrics snap updates and dsiable root-refresh, to get consistent counters
+    ::arg().setSwitch("devonly-regression-test-mode", "internal use only") = "no";
      ::arg().set("soa-minimum-ttl", "Don't change") = "0";
      ::arg().set("no-shuffle", "Don't change") = "off";
      ::arg().set("local-port", "port to listen on") = "53";
diff --git a/pdns/recursordist/rec-main.hh b/pdns/recursordist/rec-main.hh

index 6c231f7fbe783661b37fe02df2a069e457952c1c..cb3787a8aea11fa7340ef2f2a1eff49d70e065ad 100644 (file)
--- a/pdns/recursordist/rec-main.hh
+++ b/pdns/recursordist/rec-main.hh
@@ -154,6 +154,7 @@ struct DNSComboWriter
  extern thread_local unique_ptr<FDMultiplexer> t_fdm;
  extern uint16_t g_minUdpSourcePort;
  extern uint16_t g_maxUdpSourcePort;
+extern bool g_regressionTestMode;
  
  // you can ask this class for a UDP socket to send a query from
  // this socket is not yours, don't even think about deleting it
diff --git a/pdns/recursordist/rec-tcounters.cc b/pdns/recursordist/rec-tcounters.cc

new file mode 100644 (file)

index 0000000..b40815a
--- /dev/null
+++ b/pdns/recursordist/rec-tcounters.cc
@@ -0,0 +1,156 @@
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "rec-tcounters.hh"
+
+#include <sstream>
+
+namespace rec
+{
+
+Counters& Counters::merge(const Counters& data)
+{
+  // Counters are simply added
+  for (size_t i = 0; i < uint64Count.size(); i++) {
+    uint64Count.at(i) += data.uint64Count.at(i);
+  }
+  // Averages: take weight into account
+  for (size_t i = 0; i < doubleWAvg.size(); i++) {
+    auto& lhs = doubleWAvg.at(i);
+    const auto& rhs = data.doubleWAvg.at(i);
+    auto weight = lhs.weight + rhs.weight;
+    auto avg = lhs.avg * static_cast<double>(lhs.weight) + rhs.avg * static_cast<double>(rhs.weight);
+    avg = weight == 0 ? 0 : avg / weight;
+    lhs.avg = avg;
+    lhs.weight = weight;
+  }
+  // Rcode Counters are simply added
+  for (size_t i = 0; i < auth.rcodeCounters.size(); i++) {
+    auth.rcodeCounters.at(i) += data.auth.rcodeCounters.at(i);
+  }
+  // Histograms counts are added by += operator on Histograms
+  for (size_t i = 0; i < histograms.size(); i++) {
+    histograms.at(i) += data.histograms.at(i);
+  }
+
+  return *this;
+}
+
+std::string Counters::toString() const
+{
+  std::ostringstream stream;
+
+  for (auto element : uint64Count) {
+    stream << element << ' ';
+  }
+  stream << std::endl;
+  for (auto element : doubleWAvg) {
+    stream << '(' << element.avg << ' ' << element.weight << ')';
+  }
+  stream << " RCodes: ";
+  for (auto element : auth.rcodeCounters) {
+    stream << element << ' ';
+  }
+  stream << "Histograms: ";
+  for (const auto& element : histograms) {
+    stream << element.getName() << ": NYI";
+  }
+  stream << std::endl;
+  return stream.str();
+}
+
+}
+
+// Compile with:
+// c++ -DTEST_TCOUNTER_TIMING -Wall -std=c++17 -O2 rec-tcounters.cc -pthread
+
+#if TEST_TCOUNTER_TIMING
+
+#include <iostream>
+#include <vector>
+#include <atomic>
+#include <thread>
+#include <ctime>
+
+rec::GlobalCounters g_counters;
+thread_local rec::TCounters t_counters(g_counters);
+
+std::atomic<uint64_t> atomicCounter;
+
+size_t iterations;
+
+void atomicThread()
+{
+  for (size_t i = 0; i < iterations; i++) {
+    ++atomicCounter;
+  }
+}
+
+void tcounterThread()
+{
+  for (size_t i = 0; i < iterations; i++) {
+    ++t_counters.at(rec::Counter::qcounter);
+    if (i % 100 == 0) {
+      t_counters.updateSnap();
+    }
+  }
+}
+
+int main(int argc, char* argv[])
+{
+  size_t threads = std::atoi(argv[1]);
+  iterations = std::atoi(argv[2]);
+
+  std::cout << "Starting " << threads << " threads doing " << iterations << " iterations using atomics" << std::endl;
+  std::vector<std::thread> thr;
+  thr.resize(threads);
+
+  timeval start;
+  gettimeofday(&start, nullptr);
+  for (size_t i = 0; i < threads; i++) {
+    thr[i] = std::thread(atomicThread);
+  }
+  for (size_t i = 0; i < threads; i++) {
+    thr[i].join();
+  }
+  timeval stop;
+  gettimeofday(&stop, nullptr);
+  timeval diff;
+  timersub(&stop, &start, &diff);
+  auto elapsed = (diff.tv_sec + diff.tv_usec / 1e6);
+  std::cout << "Sum is " << atomicCounter << " elapsed is " << elapsed << std::endl;
+
+  std::cout << "Now doing the same with tcounters" << std::endl;
+  gettimeofday(&start, nullptr);
+  for (size_t i = 0; i < threads; i++) {
+    thr[i] = std::thread(tcounterThread);
+  }
+  for (size_t i = 0; i < threads; i++) {
+    thr[i].join();
+  }
+  gettimeofday(&stop, nullptr);
+  timersub(&stop, &start, &diff);
+  elapsed = (diff.tv_sec + diff.tv_usec / 1e6);
+  std::cout << "Sum is " << g_counters.sum(rec::Counter::qcounter) << " elapsed is " << elapsed << std::endl;
+}
+
+#endif
diff --git a/pdns/recursordist/rec-tcounters.hh b/pdns/recursordist/rec-tcounters.hh

new file mode 100644 (file)

index 0000000..d315192
--- /dev/null
+++ b/pdns/recursordist/rec-tcounters.hh
@@ -0,0 +1,219 @@
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#pragma once
+
+#include "tcounters.hh"
+
+#include <string>
+
+#include "histogram.hh"
+
+namespace rec
+{
+
+// Simple counters
+enum class Counter : uint8_t
+{
+  syncresqueries,
+  outgoingtimeouts,
+  outgoing4timeouts,
+  outgoing6timeouts,
+  throttledqueries,
+  dontqueries,
+  qnameminfallbacksuccess,
+  authzonequeries,
+  outqueries,
+  tcpoutqueries,
+  dotoutqueries,
+  unreachables,
+  servFails,
+  nxDomains,
+  noErrors,
+  qcounter,
+  ipv6qcounter,
+  tcpqcounter,
+  unauthorizedUDP, // when this is increased, qcounter isn't
+  unauthorizedTCP, // when this is increased, qcounter isn't
+  sourceDisallowedNotify, // when this is increased, qcounter is also
+  zoneDisallowedNotify, // when this is increased, qcounter is also
+  policyDrops,
+  tcpClientOverflow,
+  clientParseError,
+  serverParseError,
+  tooOldDrops,
+  truncatedDrops,
+  queryPipeFullDrops,
+  unexpectedCount,
+  caseMismatchCount,
+  spoofCount,
+  resourceLimits,
+  overCapacityDrops,
+  ipv6queries,
+  chainResends,
+  nsSetInvalidations,
+  ednsPingMatches,
+  ednsPingMismatches,
+  noPingOutQueries,
+  noEdnsOutQueries,
+  packetCacheHits,
+  noPacketError,
+  ignoredCount,
+  emptyQueriesCount,
+  dnssecQueries,
+  dnssecAuthenticDataQueries,
+  dnssecCheckDisabledQueries,
+  variableResponses,
+  maxMThreadStackUsage,
+  dnssecValidations, // should be the sum of all dnssecResult* stats
+  rebalancedQueries,
+  proxyProtocolInvalidCount,
+  nodLookupsDroppedOversize,
+  dns64prefixanswers,
+  maintenanceUsec,
+  maintenanceCalls,
+
+  numberOfCounters
+};
+
+// double avegares times, weighted according to how many packets they processed
+enum class DoubleWAvgCounter : uint8_t
+{
+  avgLatencyUsec,
+  avgLatencyOursUsec,
+  numberOfCounters
+};
+
+// An RCode histogram
+enum class RCode : uint8_t
+{
+  auth,
+  numberOfCounters
+};
+
+// A few other histograms
+enum class Histogram : uint8_t
+{
+  answers,
+  auth4Answers,
+  auth6Answers,
+  ourtime,
+  cumulativeAnswers,
+  cumulativeAuth4Answers,
+  cumulativeAuth6Answers,
+
+  numberOfCounters
+};
+
+struct Counters
+{
+  // An aray of simple counters
+  std::array<uint64_t, static_cast<size_t>(Counter::numberOfCounters)> uint64Count{};
+
+  struct WeightedAverage
+  {
+    double avg{};
+    uint64_t weight{};
+
+    void add(double value)
+    {
+      avg = value;
+      ++weight;
+    }
+
+    void addToRollingAvg(double value, uint64_t rollsize)
+    {
+      add((1.0 - 1.0 / static_cast<double>(rollsize)) * avg + value / static_cast<double>(rollsize));
+    }
+  };
+  // And an array of weighted averaged values
+  std::array<WeightedAverage, static_cast<size_t>(DoubleWAvgCounter::numberOfCounters)> doubleWAvg{};
+
+  struct RCodeCounters
+  {
+    RCodeCounters& operator+=(const RCodeCounters& rhs)
+    {
+      for (size_t i = 0; i < rcodeCounters.size(); i++) {
+        rcodeCounters.at(i) += rhs.rcodeCounters.at(i);
+      }
+      return *this;
+    }
+    static const size_t numberoOfRCodes = 16;
+    std::array<uint64_t, numberoOfRCodes> rcodeCounters;
+  };
+  // An RCodes histogram
+  RCodeCounters auth{};
+
+  std::array<pdns::Histogram, static_cast<size_t>(Histogram::numberOfCounters)> histograms = {
+    pdns::Histogram{"answers", {1000, 10000, 100000, 1000000}},
+    pdns::Histogram{"auth4answers", {1000, 10000, 100000, 1000000}},
+    pdns::Histogram{"auth6answers", {1000, 10000, 100000, 1000000}},
+    pdns::Histogram{"ourtime", {1000, 2000, 4000, 8000, 16000, 32000}},
+    pdns::Histogram{"cumul-clientanswers-", 10, 19},
+    pdns::Histogram{"cumul-authanswers-", 1000, 13},
+    pdns::Histogram{"cumul-authanswers-", 1000, 13}};
+
+  Counters()
+  {
+    for (auto& elem : uint64Count) {
+      elem = 0;
+    }
+    // doubleWAvg has a default ct that initializes
+    for (auto& elem : auth.rcodeCounters) {
+      elem = 0;
+    }
+  }
+
+  // Merge a set of counters into an existing set of counters. For simple counters, that will be additions
+  // for averages, we should take the weights into account. Histograms need to sum all individual counts.
+  Counters& merge(const Counters& data);
+
+  // The following accessors select the rightcounter type based on the index type
+  uint64_t& at(Counter index)
+  {
+    return uint64Count.at(static_cast<size_t>(index));
+  }
+
+  WeightedAverage& at(DoubleWAvgCounter index)
+  {
+    return doubleWAvg.at(static_cast<size_t>(index));
+  }
+
+  RCodeCounters& at(RCode index)
+  {
+    // We only have a single RCode indexed Histogram, so no need to select a specific one
+    return auth;
+  }
+
+  pdns::Histogram& at(Histogram index)
+  {
+    return histograms.at(static_cast<size_t>(index));
+  }
+
+  // Mainly for debugging purposes
+  [[nodiscard]] std::string toString() const;
+};
+
+// The application specific types, one for thread local, one for the aggregator
+using TCounters = pdns::TLocalCounters<Counters>;
+using GlobalCounters = pdns::GlobalCounters<Counters>;
+}
diff --git a/pdns/recursordist/rec-tcp.cc b/pdns/recursordist/rec-tcp.cc

index e1bc143b6e3d76dc2c70196e82e687e1d3a0e9ce..a0b1972e06b1a3ebd52d3ee2f08a2d7bdad471dd 100644 (file)
--- a/pdns/recursordist/rec-tcp.cc
+++ b/pdns/recursordist/rec-tcp.cc
@@ -229,7 +229,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
          SLOG(g_log << Logger::Error << "Unable to consume proxy protocol header in packet from TCP client " << conn->d_remote.toStringWithPort() << endl,
               g_slogtcpin->info(Logr::Error, "Unable to consume proxy protocol header in packet from TCP client", "remote", Logging::Loggable(conn->d_remote)));
        }
-      ++g_stats.proxyProtocolInvalidCount;
+      ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
        return;
      }
      else if (remaining < 0) {
@@ -250,7 +250,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
            SLOG(g_log << Logger::Error << "Unable to parse proxy protocol header in packet from TCP client " << conn->d_remote.toStringWithPort() << endl,
                 g_slogtcpin->info(Logr::Error, "Unable to parse proxy protocol header in packet from TCP client", "remote", Logging::Loggable(conn->d_remote)));
          }
-        ++g_stats.proxyProtocolInvalidCount;
+        ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
          return;
        }
        else if (static_cast<size_t>(used) > g_proxyProtocolMaximumSize) {
@@ -258,7 +258,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
            SLOG(g_log << Logger::Error << "Proxy protocol header in packet from TCP client " << conn->d_remote.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used << "), dropping" << endl,
                 g_slogtcpin->info(Logr::Error, "Proxy protocol header in packet from TCP client is larger than proxy-protocol-maximum-size", "remote", Logging::Loggable(conn->d_remote), "size", Logging::Loggable(used)));
          }
-        ++g_stats.proxyProtocolInvalidCount;
+        ++t_Counters.at(rec::Counter::proxyProtocolInvalidCount);
          return;
        }
  
@@ -278,7 +278,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                 g_slogtcpin->info(Logr::Error, "Dropping TCP query, address not matched by allow-from", "remote", Logging::Loggable(conn->d_remote)));
          }
  
-        ++g_stats.unauthorizedTCP;
+        ++t_Counters.at(rec::Counter::unauthorizedTCP);
          return;
        }
  
@@ -348,7 +348,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
          dc = std::make_unique<DNSComboWriter>(conn->data, g_now, t_pdl);
        }
        catch (const MOADNSException& mde) {
-        g_stats.clientParseError++;
+        t_Counters.at(rec::Counter::clientParseError)++;
          if (g_logCommonErrors) {
            SLOG(g_log << Logger::Error << "Unable to parse packet from TCP client " << conn->d_remote.toStringWithPort() << endl,
                 g_slogtcpin->info(Logr::Error, "Unable to parse packet from TCP client", "remte", Logging::Loggable(conn->d_remote)));
@@ -471,13 +471,13 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
              SLOG(g_log << Logger::Notice << RecThreadInfo::id() << " [" << MT->getTid() << "/" << MT->numProcesses() << "] DROPPED TCP question from " << dc->d_source.toStringWithPort() << (dc->d_source != dc->d_remote ? " (via " + dc->d_remote.toStringWithPort() + ")" : "") << " based on policy" << endl,
                   g_slogtcpin->info(Logr::Info, "Dropped TCP question based on policy", "remote", Logging::Loggable(conn->d_remote), "source", Logging::Loggable(dc->d_source)));
            }
-          g_stats.policyDrops++;
+          t_Counters.at(rec::Counter::policyDrops)++;
            return;
          }
        }
  
        if (dc->d_mdp.d_header.qr) {
-        g_stats.ignoredCount++;
+        t_Counters.at(rec::Counter::ignoredCount)++;
          if (g_logCommonErrors) {
            SLOG(g_log << Logger::Error << "Ignoring answer from TCP client " << dc->getRemote() << " on server socket!" << endl,
                 g_slogtcpin->info(Logr::Error, "Ignoring answer from TCP client on server socket", "remote", Logging::Loggable(dc->getRemote())));
@@ -485,7 +485,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
          return;
        }
        if (dc->d_mdp.d_header.opcode != Opcode::Query && dc->d_mdp.d_header.opcode != Opcode::Notify) {
-        g_stats.ignoredCount++;
+        t_Counters.at(rec::Counter::ignoredCount)++;
          if (g_logCommonErrors) {
            SLOG(g_log << Logger::Error << "Ignoring unsupported opcode " << Opcode::to_s(dc->d_mdp.d_header.opcode) << " from TCP client " << dc->getRemote() << " on server socket!" << endl,
                 g_slogtcpin->info(Logr::Error, "Ignoring unsupported opcode from TCP client", "remote", Logging::Loggable(dc->getRemote()), "opcode", Logging::Loggable(Opcode::to_s(dc->d_mdp.d_header.opcode))));
@@ -495,7 +495,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
          return;
        }
        else if (dh->qdcount == 0) {
-        g_stats.emptyQueriesCount++;
+        t_Counters.at(rec::Counter::emptyQueriesCount)++;
          if (g_logCommonErrors) {
            SLOG(g_log << Logger::Error << "Ignoring empty (qdcount == 0) query from " << dc->getRemote() << " on server socket!" << endl,
                 g_slogtcpin->info(Logr::Error, "Ignoring empty (qdcount == 0) query on server socket", "remote", Logging::Loggable(dc->getRemote())));
@@ -506,8 +506,9 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
        }
        else {
          // We have read a proper query
-        ++g_stats.qcounter;
-        ++g_stats.tcpqcounter;
+        //++t_Counters.at(rec::Counter::qcounter);
+        ++t_Counters.at(rec::Counter::qcounter);
+        ++t_Counters.at(rec::Counter::tcpqcounter);
  
          if (dc->d_mdp.d_header.opcode == Opcode::Notify) {
            if (!t_allowNotifyFrom || !t_allowNotifyFrom->match(dc->d_mappedSource)) {
@@ -516,7 +517,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                     g_slogtcpin->info(Logr::Error, "Dropping TCP NOTIFY, address not matched by allow-notify-from", "source", Logging::Loggable(dc->d_mappedSource)));
              }
  
-            g_stats.sourceDisallowedNotify++;
+            t_Counters.at(rec::Counter::sourceDisallowedNotify)++;
              return;
            }
  
@@ -526,7 +527,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                     g_slogtcpin->info(Logr::Error, "Dropping TCP NOTIFY,  zone not matched by allow-notify-for", "source", Logging::Loggable(dc->d_mappedSource), "zone", Logging::Loggable(qname)));
              }
  
-            g_stats.zoneDisallowedNotify++;
+            t_Counters.at(rec::Counter::zoneDisallowedNotify)++;
              return;
            }
          }
@@ -555,7 +556,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
              struct timeval now;
              Utility::gettimeofday(&now, nullptr);
              uint64_t spentUsec = uSec(now - start);
-            g_stats.cumulativeAnswers(spentUsec);
+            t_Counters.at(rec::Histogram::cumulativeAnswers)(spentUsec);
              dc->d_eventTrace.add(RecEventTrace::AnswerSent);
  
              if (t_protobufServers.servers && dc->d_logResponse && !(luaconfsLocal->protobufExportConfig.taggedOnly && pbData && !pbData->d_tagged)) {
@@ -571,6 +572,7 @@ static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
                     g_slogtcpin->info(Logr::Info, dc->d_eventTrace.toString())); // More fancy?
              }
              tcpGuard.keep();
+            t_Counters.updateSnap(g_regressionTestMode);
              return;
            } // cache hit
          } // query opcode
@@ -618,7 +620,7 @@ void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t&)
    int newsock = accept(fd, (struct sockaddr*)&addr, &addrlen);
    if (newsock >= 0) {
      if (MT->numProcesses() > g_maxMThreads) {
-      g_stats.overCapacityDrops++;
+      t_Counters.at(rec::Counter::overCapacityDrops)++;
        try {
          closesocket(newsock);
        }
@@ -646,7 +648,7 @@ void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t&)
          SLOG(g_log << Logger::Error << "[" << MT->getTid() << "] dropping TCP query from " << mappedSource.toString() << ", address neither matched by allow-from nor proxy-protocol-from" << endl,
               g_slogtcpin->info(Logr::Error, "dropping TCP query address neither matched by allow-from nor proxy-protocol-from", "source", Logging::Loggable(mappedSource)));
  
-      g_stats.unauthorizedTCP++;
+      t_Counters.at(rec::Counter::unauthorizedTCP)++;
        try {
          closesocket(newsock);
        }
@@ -658,7 +660,7 @@ void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t&)
      }
  
      if (g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
-      g_stats.tcpClientOverflow++;
+      t_Counters.at(rec::Counter::tcpClientOverflow)++;
        try {
          closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
        }
diff --git a/pdns/recursordist/tcounters.hh b/pdns/recursordist/tcounters.hh

new file mode 120000 (symlink)

index 0000000..7bf637d
--- /dev/null
+++ b/pdns/recursordist/tcounters.hh
@@ -0,0 +1 @@
+../tcounters.hh
+\ No newline at end of file
diff --git a/pdns/recursordist/test-rec-tcounters_cc.cc b/pdns/recursordist/test-rec-tcounters_cc.cc

new file mode 100644 (file)

index 0000000..a86cab0
--- /dev/null
+++ b/pdns/recursordist/test-rec-tcounters_cc.cc
@@ -0,0 +1,149 @@
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_NO_MAIN
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <boost/test/unit_test.hpp>
+
+#include <unistd.h>
+#include <thread>
+#include "rec-tcounters.hh"
+
+static rec::GlobalCounters global;
+static thread_local rec::TCounters tlocal(global);
+
+BOOST_AUTO_TEST_SUITE(test_rec_tcounters_cc)
+
+BOOST_AUTO_TEST_CASE(destruct)
+{
+  global.reset();
+
+  const size_t count = 100000;
+  std::thread thread1([] {
+    for (size_t i = 0; i < count; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+    }
+  });
+  std::thread thread2([] {
+    for (size_t i = 0; i < count; i++) {
+      ++tlocal.at(rec::Counter::nxDomains);
+    }
+  });
+  thread1.join();
+  thread2.join();
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::servFails), count);
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::nxDomains), count);
+}
+
+BOOST_AUTO_TEST_CASE(update_fast)
+{
+  global.reset();
+
+  std::atomic<uint64_t> done{};
+
+  const size_t count = 10000000;
+  std::thread thread1([&done] {
+    for (size_t i = 0; i < count; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+      ++tlocal.at(rec::Counter::nxDomains);
+      tlocal.at(rec::DoubleWAvgCounter::avgLatencyUsec).add(1.1);
+      if (random() % 10000 == 0) {
+        tlocal.updateSnap();
+      }
+    }
+    done++;
+  });
+  std::thread thread2([&done] {
+    for (size_t i = 0; i < count / 2; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+      ++tlocal.at(rec::Counter::nxDomains);
+      tlocal.at(rec::DoubleWAvgCounter::avgLatencyUsec).add(2.2);
+      if (random() % 10000 == 0) {
+        tlocal.updateSnap();
+      }
+    }
+    done++;
+  });
+  std::thread thread3([&done] {
+    while (done < 2) {
+      auto counts = global.aggregatedSnap();
+      BOOST_CHECK_EQUAL(counts.uint64Count[0], counts.uint64Count[1]);
+      auto avg = counts.at(rec::DoubleWAvgCounter::avgLatencyUsec).avg;
+      BOOST_CHECK(avg == 0.0 || (avg >= 1.1 && avg <= 2.2));
+    }
+  });
+  thread1.join();
+  thread2.join();
+  thread3.join();
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::servFails), count + count / 2);
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::nxDomains), count + count / 2);
+  auto avg = global.avg(rec::DoubleWAvgCounter::avgLatencyUsec);
+  BOOST_CHECK(avg >= 1.1 && avg <= 2.2);
+}
+
+BOOST_AUTO_TEST_CASE(update_with_sleep)
+{
+
+  global.reset();
+
+  std::atomic<int> done{};
+
+  const size_t count = 1000000;
+  std::thread thread1([&done] {
+    for (size_t i = 0; i < count; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+      ++tlocal.at(rec::Counter::nxDomains);
+      tlocal.at(rec::DoubleWAvgCounter::avgLatencyUsec).add(1.1);
+      if (random() % 10000 == 0) {
+        tlocal.updateSnap();
+      }
+      struct timespec interval
+      {
+        0, random() % 5000
+      };
+      nanosleep(&interval, nullptr);
+    }
+    done++;
+  });
+  std::thread thread2([&done] {
+    for (size_t i = 0; i < count / 2; i++) {
+      ++tlocal.at(rec::Counter::servFails);
+      ++tlocal.at(rec::Counter::nxDomains);
+      tlocal.at(rec::DoubleWAvgCounter::avgLatencyUsec).add(2.2);
+      if (random() % 10000 == 0) {
+        tlocal.updateSnap();
+      }
+      struct timespec interval
+      {
+        0, random() % 999
+      };
+      nanosleep(&interval, nullptr);
+    }
+    done++;
+  });
+  std::thread thread3([&done] {
+    while (done < 2) {
+      auto counts = global.aggregatedSnap();
+      BOOST_CHECK_EQUAL(counts.uint64Count[0], counts.uint64Count[1]);
+      auto avg = counts.at(rec::DoubleWAvgCounter::avgLatencyUsec).avg;
+      // std::cerr << avg << std::endl;
+      BOOST_CHECK(avg == 0.0 || (avg >= 1.1 && avg <= 2.2));
+      struct timespec interval
+      {
+        0, random() % 10000
+      };
+      nanosleep(&interval, nullptr);
+    }
+  });
+  thread1.join();
+  thread2.join();
+  thread3.join();
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::servFails), count + count / 2);
+  BOOST_CHECK_EQUAL(global.sum(rec::Counter::nxDomains), count + count / 2);
+  auto avg = global.avg(rec::DoubleWAvgCounter::avgLatencyUsec);
+  BOOST_CHECK(avg >= 1.1 && avg <= 2.2);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/pdns/recursordist/test-syncres_cc.cc b/pdns/recursordist/test-syncres_cc.cc

index ae2c062132927231100d4783bdd5deff25e291fc..4d47fcb0fcf9b31abd7e12de0b787a62cb79f948 100644 (file)
--- a/pdns/recursordist/test-syncres_cc.cc
+++ b/pdns/recursordist/test-syncres_cc.cc
@@ -29,10 +29,6 @@ ArgvMap& arg()
    return theArg;
  }
  
-void primeRootNSZones(DNSSECMode, unsigned int)
-{
-}
-
  BaseLua4::~BaseLua4()
  {
  }
diff --git a/pdns/syncres.cc b/pdns/syncres.cc

index 5fe1dbf69bba17c6bd47e9093deaf56c67cd04df..039ae2980149ac6f08ec588b9e7a6827b67481f7 100644 (file)
--- a/pdns/syncres.cc
+++ b/pdns/syncres.cc
@@ -38,6 +38,9 @@
  #include "validate-recursor.hh"
  #include "rec-taskqueue.hh"
  
+rec::GlobalCounters g_Counters;
+thread_local rec::TCounters t_Counters(g_Counters);
+
  template<class T>
  class fails_t : public boost::noncopyable
  {
@@ -427,18 +430,6 @@ unsigned int SyncRes::s_serverdownthrottletime;
  unsigned int SyncRes::s_nonresolvingnsmaxfails;
  unsigned int SyncRes::s_nonresolvingnsthrottletime;
  unsigned int SyncRes::s_ecscachelimitttl;
-pdns::stat_t SyncRes::s_authzonequeries;
-pdns::stat_t SyncRes::s_queries;
-pdns::stat_t SyncRes::s_outgoingtimeouts;
-pdns::stat_t SyncRes::s_outgoing4timeouts;
-pdns::stat_t SyncRes::s_outgoing6timeouts;
-pdns::stat_t SyncRes::s_outqueries;
-pdns::stat_t SyncRes::s_tcpoutqueries;
-pdns::stat_t SyncRes::s_dotoutqueries;
-pdns::stat_t SyncRes::s_throttledqueries;
-pdns::stat_t SyncRes::s_dontqueries;
-pdns::stat_t SyncRes::s_qnameminfallbacksuccess;
-pdns::stat_t SyncRes::s_unreachables;
  pdns::stat_t SyncRes::s_ecsqueries;
  pdns::stat_t SyncRes::s_ecsresponses;
  std::map<uint8_t, pdns::stat_t> SyncRes::s_ecsResponsesBySubnetSize4;
@@ -486,11 +477,11 @@ static inline std::string fmtfloat(const char* fmt, double f)
  static inline void accountAuthLatency(uint64_t usec, int family)
  {
    if (family == AF_INET) {
-    g_stats.auth4Answers(usec);
-    g_stats.cumulativeAuth4Answers(usec);
+    t_Counters.at(rec::Histogram::auth4Answers)(usec);
+    t_Counters.at(rec::Histogram::cumulativeAuth4Answers)(usec);
    } else  {
-    g_stats.auth6Answers(usec);
-    g_stats.cumulativeAuth6Answers(usec);
+    t_Counters.at(rec::Histogram::auth6Answers)(usec);
+    t_Counters.at(rec::Histogram::cumulativeAuth6Answers)(usec);
    }
  }
  
@@ -694,7 +685,7 @@ int SyncRes::beginResolve(const DNSName &qname, const QType qtype, QClass qclass
  {
    d_eventTrace.add(RecEventTrace::SyncRes);
    vState state = vState::Indeterminate;
-  s_queries++;
+  t_Counters.at(rec::Counter::syncresqueries)++;
    d_wasVariable=false;
    d_wasOutOfBand=false;
    d_cutStates.clear();
@@ -726,7 +717,7 @@ int SyncRes::beginResolve(const DNSName &qname, const QType qtype, QClass qclass
  
    if (shouldValidate()) {
      if (d_queryValidationState != vState::Indeterminate) {
-      g_stats.dnssecValidations++;
+      t_Counters.at(rec::Counter::dnssecValidations)++;
      }
      auto xdnssec = g_xdnssec.getLocal();
      if (xdnssec->check(qname)) {
@@ -993,7 +984,7 @@ int SyncRes::AuthDomain::getRecords(const DNSName& qname, const QType qtype, std
  bool SyncRes::doOOBResolve(const AuthDomain& domain, const DNSName &qname, const QType qtype, vector<DNSRecord>&ret, int& res)
  {
    d_authzonequeries++;
-  s_authzonequeries++;
+  t_Counters.at(rec::Counter::authzonequeries)++;
  
    res = domain.getRecords(qname, qtype, ret);
    return true;
@@ -1516,7 +1507,7 @@ LWResult::Result SyncRes::asyncresolveWrapper(const ComboAddress& ip, bool ednsM
    for (int tries = 0; tries < 2; ++tries) {
  
      if (mode == EDNSStatus::NOEDNS) {
-      g_stats.noEdnsOutQueries++;
+      t_Counters.at(rec::Counter::noEdnsOutQueries)++;
        EDNSLevel = 0; // level != mode
      }
      else if (ednsMANDATORY || mode != EDNSStatus::NOEDNS) {
@@ -1772,7 +1763,7 @@ int SyncRes::doResolve(const DNSName &qname, const QType qtype, vector<DNSRecord
          res = doResolveNoQNameMinimization(qname, qtype, ret, depth/2, beenthere, state);
  
          if(res == RCode::NoError) {
-          s_qnameminfallbacksuccess++;
+          t_Counters.at(rec::Counter::qnameminfallbacksuccess)++;
          }
  
          QLOG("Step5 End resolve: " << RCode::to_s(res) << "/" << ret.size());
@@ -1853,7 +1844,7 @@ int SyncRes::doResolveNoQNameMinimization(const DNSName &qname, const QType qtyp
  
                d_totUsec += lwr.d_usec;
                accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
-              ++g_stats.authRCode.at(lwr.d_rcode);
+              ++t_Counters.at(rec::RCode::auth).rcodeCounters.at(static_cast<uint8_t>(lwr.d_rcode));
                if (fromCache)
                  *fromCache = true;
  
@@ -3242,7 +3233,7 @@ void SyncRes::handlePolicyHit(const std::string& prefix, const DNSName& qname, c
        return;
  
    case DNSFilterEngine::PolicyKind::Drop:
-    ++g_stats.policyDrops;
+    ++t_Counters.at(rec::Counter::policyDrops);
      throw ImmediateQueryDropException();
  
    case DNSFilterEngine::PolicyKind::NXDOMAIN:
@@ -3417,12 +3408,14 @@ bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress&
  {
    if (isThrottled(d_now.tv_sec, remoteIP)) {
      LOG(prefix<<qname<<": server throttled "<<endl);
-    s_throttledqueries++; d_throttledqueries++;
+    t_Counters.at(rec::Counter::throttledqueries)++;
+    d_throttledqueries++;
      return true;
    }
    else if (isThrottled(d_now.tv_sec, remoteIP, qname, qtype)) {
      LOG(prefix<<qname<<": query throttled "<<remoteIP.toString()<<", "<<qname<<"; "<<qtype<<endl);
-    s_throttledqueries++; d_throttledqueries++;
+    t_Counters.at(rec::Counter::throttledqueries)++;
+    d_throttledqueries++;
      return true;
    }
    else if(!pierceDontQuery && s_dontQuery && s_dontQuery->match(&remoteIP)) {
@@ -3432,14 +3425,14 @@ bool SyncRes::throttledOrBlocked(const std::string& prefix, const ComboAddress&
      auto it = getBestAuthZone(&forwardCandidate);
      if (it == t_sstorage.domainmap->end()) {
        LOG(prefix<<qname<<": not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
-      s_dontqueries++;
+      t_Counters.at(rec::Counter::dontqueries)++;
        return true;
      } else {
        // The name (from the cache) is forwarded, but is it forwarded to an IP in known forwarders?
        const auto& ips = it->second.d_servers;
        if (std::find(ips.cbegin(), ips.cend(), remoteIP) == ips.cend()) {
          LOG(prefix<<qname<<": not sending query to " << remoteIP.toString() << ", blocked by 'dont-query' setting" << endl);
-        s_dontqueries++;
+        t_Counters.at(rec::Counter::dontqueries)++;
          return true;
        } else {
          LOG(prefix<<qname<<": sending query to " << remoteIP.toString() << ", blocked by 'dont-query' but a forwarding/auth case" << endl);
@@ -5148,7 +5141,7 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
  {
    bool chained = false;
    LWResult::Result resolveret = LWResult::Result::Success;
-  s_outqueries++;
+  t_Counters.at(rec::Counter::outqueries)++;
    d_outqueries++;
    checkMaxQperQ(qname);
  
@@ -5159,11 +5152,11 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
    if(doTCP) {
      if (doDoT) {
        LOG(prefix<<qname<<": using DoT with "<< remoteIP.toStringWithPort() <<endl);
-      s_dotoutqueries++;
+      t_Counters.at(rec::Counter::dotoutqueries)++;
        d_dotoutqueries++;
      } else {
        LOG(prefix<<qname<<": using TCP with "<< remoteIP.toStringWithPort() <<endl);
-      s_tcpoutqueries++;
+      t_Counters.at(rec::Counter::tcpoutqueries)++;
        d_tcpoutqueries++;
      }
    }
@@ -5201,7 +5194,7 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
  
    d_totUsec += lwr.d_usec;
    accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
-  ++g_stats.authRCode.at(lwr.d_rcode);
+  ++t_Counters.at(rec::RCode::auth).rcodeCounters.at(static_cast<uint8_t>(lwr.d_rcode));
  
    if (!dontThrottle) {
      auto dontThrottleNames = g_dontThrottleNames.getLocal();
@@ -5216,12 +5209,12 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
  
        LOG(prefix<<qname<<": timeout resolving after "<<lwr.d_usec/1000.0<<"msec "<< (doTCP ? "over TCP" : "")<<endl);
        d_timeouts++;
-      s_outgoingtimeouts++;
+      t_Counters.at(rec::Counter::outgoingtimeouts)++;
  
        if(remoteIP.sin4.sin_family == AF_INET)
-        s_outgoing4timeouts++;
+        t_Counters.at(rec::Counter::outgoing4timeouts)++;
        else
-        s_outgoing6timeouts++;
+        t_Counters.at(rec::Counter::outgoing6timeouts)++;
  
        if(t_timeouts)
          t_timeouts->push_back(remoteIP);
@@ -5229,14 +5222,14 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname,
      else if (resolveret == LWResult::Result::OSLimitError) {
        /* OS resource limit reached */
        LOG(prefix<<qname<<": hit a local resource limit resolving"<< (doTCP ? " over TCP" : "")<<", probable error: "<<stringerror()<<endl);
-      g_stats.resourceLimits++;
+      t_Counters.at(rec::Counter::resourceLimits)++;
      }
      else if (resolveret == LWResult::Result::Spoofed) {
        spoofed = true;
      }
      else {
        /* LWResult::Result::PermanentError */
-      s_unreachables++;
+      t_Counters.at(rec::Counter::unreachables)++;
        d_unreachables++;
        // XXX questionable use of errno
        LOG(prefix<<qname<<": error resolving from "<<remoteIP.toString()<< (doTCP ? " over TCP" : "") <<", possible error: "<<stringerror()<< endl);
@@ -5570,7 +5563,7 @@ int SyncRes::doResolveAt(NsSet &nameservers, DNSName auth, bool flawedNSSet, con
            LOG(prefix<<qname<<": Ageing nameservers for level '"<<auth<<"', next query might succeed"<<endl);
  
            if(g_recCache->doAgeCache(d_now.tv_sec, auth, QType::NS, 10))
-            g_stats.nsSetInvalidations++;
+            t_Counters.at(rec::Counter::nsSetInvalidations)++;
          }
          return -1;
        }
diff --git a/pdns/syncres.hh b/pdns/syncres.hh

index 904089aa6dbac68f2241012661f768ab1d035f74..66b2508d9577bee9e4ac21b0266f1638bdc937de 100644 (file)
--- a/pdns/syncres.hh
+++ b/pdns/syncres.hh
@@ -53,6 +53,7 @@
  #include "tcpiohandler.hh"
  #include "rec-eventtrace.hh"
  #include "logr.hh"
+#include "rec-tcounters.hh"
  
  #ifdef HAVE_CONFIG_H
  #include "config.h"
@@ -458,18 +459,6 @@ public:
  
    static thread_local ThreadLocalStorage t_sstorage;
  
-  static pdns::stat_t s_queries;
-  static pdns::stat_t s_outgoingtimeouts;
-  static pdns::stat_t s_outgoing4timeouts;
-  static pdns::stat_t s_outgoing6timeouts;
-  static pdns::stat_t s_throttledqueries;
-  static pdns::stat_t s_dontqueries;
-  static pdns::stat_t s_qnameminfallbacksuccess;
-  static pdns::stat_t s_authzonequeries;
-  static pdns::stat_t s_outqueries;
-  static pdns::stat_t s_tcpoutqueries;
-  static pdns::stat_t s_dotoutqueries;
-  static pdns::stat_t s_unreachables;
    static pdns::stat_t s_ecsqueries;
    static pdns::stat_t s_ecsresponses;
    static std::map<uint8_t, pdns::stat_t> s_ecsResponsesBySubnetSize4;
@@ -760,79 +749,17 @@ struct PacketIDBirthdayCompare
  };
  extern std::unique_ptr<MemRecursorCache> g_recCache;
  
+extern rec::GlobalCounters g_Counters;
+extern thread_local rec::TCounters t_Counters;
+
  struct RecursorStats
  {
-  pdns::stat_t servFails;
-  pdns::stat_t nxDomains;
-  pdns::stat_t noErrors;
-  pdns::AtomicHistogram answers;
-  pdns::AtomicHistogram auth4Answers;
-  pdns::AtomicHistogram auth6Answers;
-  pdns::AtomicHistogram ourtime;
-  pdns::AtomicHistogram cumulativeAnswers;
-  pdns::AtomicHistogram cumulativeAuth4Answers;
-  pdns::AtomicHistogram cumulativeAuth6Answers;
-  pdns::stat_t_trait<double> avgLatencyUsec;
-  pdns::stat_t_trait<double> avgLatencyOursUsec;
-  pdns::stat_t qcounter;     // not increased for unauth packets
-  pdns::stat_t ipv6qcounter;
-  pdns::stat_t tcpqcounter;
-  pdns::stat_t unauthorizedUDP;  // when this is increased, qcounter isn't
-  pdns::stat_t unauthorizedTCP;  // when this is increased, qcounter isn't
-  pdns::stat_t sourceDisallowedNotify;  // when this is increased, qcounter is also
-  pdns::stat_t zoneDisallowedNotify;  // when this is increased, qcounter is also
-  pdns::stat_t policyDrops;
-  pdns::stat_t tcpClientOverflow;
-  pdns::stat_t clientParseError;
-  pdns::stat_t serverParseError;
-  pdns::stat_t tooOldDrops;
-  pdns::stat_t truncatedDrops;
-  pdns::stat_t queryPipeFullDrops;
-  pdns::stat_t unexpectedCount;
-  pdns::stat_t caseMismatchCount;
-  pdns::stat_t spoofCount;
-  pdns::stat_t resourceLimits;
-  pdns::stat_t overCapacityDrops;
-  pdns::stat_t ipv6queries;
-  pdns::stat_t chainResends;
-  pdns::stat_t nsSetInvalidations;
-  pdns::stat_t ednsPingMatches;
-  pdns::stat_t ednsPingMismatches;
-  pdns::stat_t noPingOutQueries, noEdnsOutQueries;
-  pdns::stat_t packetCacheHits;
-  pdns::stat_t noPacketError;
-  pdns::stat_t ignoredCount;
-  pdns::stat_t emptyQueriesCount;
    time_t startupTime{time(nullptr)};
-  pdns::stat_t dnssecQueries;
-  pdns::stat_t dnssecAuthenticDataQueries;
-  pdns::stat_t dnssecCheckDisabledQueries;
-  pdns::stat_t variableResponses;
-  pdns::stat_t maxMThreadStackUsage;
-  pdns::stat_t dnssecValidations; // should be the sum of all dnssecResult* stats
+  // XXX Convert counter below to be part of rec::Counters
    std::map<vState, pdns::stat_t > dnssecResults;
    std::map<vState, pdns::stat_t > xdnssecResults;
    std::map<DNSFilterEngine::PolicyKind, pdns::stat_t > policyResults;
    LockGuarded<std::unordered_map<std::string, pdns::stat_t>> policyHits;
-  pdns::stat_t rebalancedQueries{0};
-  pdns::stat_t proxyProtocolInvalidCount{0};
-  pdns::stat_t nodLookupsDroppedOversize{0};
-  pdns::stat_t dns64prefixanswers{0};
-  pdns::stat_t maintenanceUsec{0};
-  pdns::stat_t maintenanceCalls{0};
-  std::array<pdns::stat_t, 16> authRCode;
-
-  RecursorStats() :
-    answers("answers", { 1000, 10000, 100000, 1000000 }),
-    auth4Answers("auth4answers", { 1000, 10000, 100000, 1000000 }),
-    auth6Answers("auth6answers", { 1000, 10000, 100000, 1000000 }),
-    ourtime("ourtime", { 1000, 2000, 4000, 8000, 16000, 32000 }),
-    cumulativeAnswers("cumul-clientanswers-", 10, 19),
-    // These two will be merged when outputting
-    cumulativeAuth4Answers("cumul-authanswers-", 1000, 13),
-    cumulativeAuth6Answers("cumul-authanswers-", 1000, 13)
-  {
-  }
  };
  
  //! represents a running TCP/IP client session
diff --git a/pdns/tcounters.hh b/pdns/tcounters.hh

new file mode 100644 (file)

index 0000000..c6956c6
--- /dev/null
+++ b/pdns/tcounters.hh
@@ -0,0 +1,260 @@
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#pragma once
+
+#include <sys/time.h>
+#include <array>
+#include <set>
+#include <unistd.h>
+
+#include "lock.hh"
+
+namespace pdns
+{
+// We keep three sets of related counters:
+//
+// 1. The current counters (thread-local, updated individually by thread code very often)
+// 2. The snapshot counters (thread local, updated by thread code in one single mutex protected copy)
+// 3. The history counters (global) to keep track of the counters of deleted threads
+
+// We have two main clasess: one that holds the thread local counters
+// (both current and snapshot ones) and one that aggregates the
+// values for all threads adn keeps the history counters.
+
+// The thread local current counters are the ones updated by
+// performance critical code.  Every once in a while, all values in the
+// current counters are copied to the snapshot thread local copies in
+// a thread safe way.
+
+// The snapshot counters are aggregated by the GlobalCounters
+// class, as these can be accessed safely from multiple threads.
+
+// Make sure to call the thread local tlocal.updatesAtomics() once
+// in a while. This will fill the snapshot values for that thread if some
+// time has passed since the last snap update.
+
+// To fet aggregate value call globals.sum(counter1) or
+// globals.avg(counter2), or any aggreggation function.  If multiple
+// counters need to be collected in a consistent way:
+// auto data = globals.aggregatedSnap();
+//
+// Note that the aggregate values can mix somewhat older thread-local
+// with newer thread-local info from another thread. So it is possible
+// to see the following:
+//
+// If thread T1 increments "received" and then passes the packet to
+// thread T2 that increments "processed", it may happen that the value
+// of "processed" observed by sum() is higher than "received", as T1
+// might not have called updateSnap() yet while T2 did.  To avoid this
+// inconsistentcy, be careful to update related counters in a single
+// thread only.
+
+// For an example of the use of these templates, see rec-tcounters.hh
+
+template <typename Counters>
+class TLocalCounters;
+
+template <typename Counters>
+class GlobalCounters
+{
+public:
+  // Register a thread local set of values
+  void subscribe(TLocalCounters<Counters>* ptr)
+  {
+    auto lock = d_guarded.lock();
+    lock->d_instances.emplace(ptr);
+  }
+
+  // Unregister, typically done when a thread exits
+  void unsubscribe(TLocalCounters<Counters>* ptr, const Counters& data)
+  {
+    auto lock = d_guarded.lock();
+    lock->d_instances.erase(ptr);
+    lock->d_history.merge(data);
+  }
+
+  // Two ways of computing aggregated values for a specific counter: simple additions of all thread data, or taking weighted averages into account
+  template <typename Enum>
+  auto sum(Enum index);
+  template <typename Enum>
+  auto avg(Enum index);
+  template <typename Enum>
+  auto max(Enum index);
+
+  // Aggregate all counter data for all threads
+  Counters aggregatedSnap();
+
+  // Reset history
+  void reset()
+  {
+    auto lock = d_guarded.lock();
+    lock->d_history = Counters();
+  }
+
+private:
+  struct Guarded
+  {
+    // We have x instances, normally one per thread
+    std::set<TLocalCounters<Counters>*> d_instances;
+    // If an instance gets deleted because its thread is cleaned up, the values
+    // are accumulated in d_history
+    Counters d_history;
+  };
+  LockGuarded<Guarded> d_guarded;
+};
+
+template <typename Counters>
+class TLocalCounters
+{
+public:
+  static const suseconds_t defaultSnapUpdatePeriodus = 100000;
+  TLocalCounters(GlobalCounters<Counters>& collector, timeval interval = timeval{0, defaultSnapUpdatePeriodus}) :
+    d_collector(collector), d_interval(interval)
+  {
+    collector.subscribe(this);
+  }
+
+  ~TLocalCounters()
+  {
+    d_collector.unsubscribe(this, d_current);
+  }
+
+  TLocalCounters(const TLocalCounters&) = delete;
+  TLocalCounters(TLocalCounters&&) = delete;
+  TLocalCounters& operator=(const TLocalCounters&) = delete;
+  TLocalCounters& operator=(TLocalCounters&&) = delete;
+
+  template <typename Enum>
+  auto& at(Enum index)
+  {
+    return d_current.at(index);
+  }
+
+  template <typename Enum>
+  auto snapAt(Enum index)
+  {
+    return d_snapshot.lock()->at(index);
+  }
+
+  [[nodiscard]] Counters getSnap()
+  {
+    return *(d_snapshot.lock());
+  }
+
+  bool updateSnap(const timeval& tv_now, bool force = false)
+  {
+    timeval tv_diff{};
+
+    if (!force) {
+      timersub(&tv_now, &d_last, &tv_diff);
+    }
+    if (force || timercmp(&tv_diff, &d_interval, >=)) {
+      // It's a copy
+      *(d_snapshot.lock()) = d_current;
+      d_last = tv_now;
+      return true;
+    }
+    return false;
+  }
+
+  bool updateSnap(bool force = false)
+  {
+    timeval tv_now{};
+
+    if (!force) {
+      gettimeofday(&tv_now, nullptr);
+    }
+    return updateSnap(tv_now, force);
+  }
+
+private:
+  GlobalCounters<Counters>& d_collector;
+  Counters d_current;
+  LockGuarded<Counters> d_snapshot;
+  timeval d_last{0, 0};
+  const timeval d_interval;
+};
+
+// Sum for a specific index
+// In the future we might the move the specifics of computing an aggregated value to the
+// app specific Counters class
+template <typename Counters>
+template <typename Enum>
+auto GlobalCounters<Counters>::sum(Enum index)
+{
+  auto lock = d_guarded.lock();
+  auto sum = lock->d_history.at(index);
+  for (const auto& instance : lock->d_instances) {
+    sum += instance->snapAt(index);
+  }
+  return sum;
+}
+
+// Average for a specific index
+// In the future we might the move the specifics of computing an aggregated value to the
+// app specific Counters class
+template <typename Counters>
+template <typename Enum>
+auto GlobalCounters<Counters>::avg(Enum index)
+{
+  auto lock = d_guarded.lock();
+  auto wavg = lock->d_history.at(index);
+  auto sum = wavg.avg * wavg.weight;
+  auto count = wavg.weight;
+  for (const auto& instance : lock->d_instances) {
+    auto val = instance->snapAt(index);
+    count += val.weight;
+    sum += val.avg * val.weight;
+  }
+  return count > 0 ? sum / count : 0;
+}
+
+// Max for a specific  index
+// In the future we might the move the specifics of computing an aggregated value to the
+// app specific Counters class
+template <typename Counters>
+template <typename Enum>
+auto GlobalCounters<Counters>::max(Enum index)
+{
+  auto lock = d_guarded.lock();
+  uint64_t max = 0; // ignore history
+  for (const auto& instance : lock->d_instances) {
+    max = std::max(instance->snapAt(index), max);
+  }
+  return max;
+}
+
+// Get a consistent snap of *all* aggregated values
+template <typename Counters>
+Counters GlobalCounters<Counters>::aggregatedSnap()
+{
+  auto lock = d_guarded.lock();
+  Counters ret = lock->d_history;
+  for (const auto& instance : lock->d_instances) {
+    auto snap = instance->getSnap();
+    ret.merge(snap);
+  }
+  return ret;
+}
+
+}
diff --git a/regression-tests.recursor-dnssec/recursortests.py b/regression-tests.recursor-dnssec/recursortests.py

index 2576b0955b5802a2214ab22928b6c365983ad687..165208b7926aed4ab3b0d9073a3814acb8640975 100644 (file)
--- a/regression-tests.recursor-dnssec/recursortests.py
+++ b/regression-tests.recursor-dnssec/recursortests.py
@@ -39,7 +39,6 @@ class RecursorTest(AssertEqualDNSMessageMixin, unittest.TestCase):
  
      _confdir = 'recursor'
  
-    _recursorStartupDelay = 2.0
      _recursorPort = 5300
  
      _recursor = None
@@ -649,11 +648,11 @@ distributor-threads={threads}""".format(confdir=confdir,
              raise AssertionError('%s failed (%d)' % (recursorcmd, cls._recursor.returncode))
  
      @classmethod
-    def wipeRecursorCache(cls, confdir):
+    def wipeRecursorCache(cls, confdir, name='.$'):
          rec_controlCmd = [os.environ['RECCONTROL'],
                            '--config-dir=%s' % confdir,
                            'wipe-cache',
-                          '.$']
+                          name]
          try:
              subprocess.check_output(rec_controlCmd, stderr=subprocess.STDOUT)
          except subprocess.CalledProcessError as e:
diff --git a/regression-tests.recursor-dnssec/test_AggressiveNSECCache.py b/regression-tests.recursor-dnssec/test_AggressiveNSECCache.py

index a4640176c69df64241f9d6321345128d79dfed6c..808bbdd3d8473e367b6a4349924b2209d29c9313 100644 (file)
--- a/regression-tests.recursor-dnssec/test_AggressiveNSECCache.py
+++ b/regression-tests.recursor-dnssec/test_AggressiveNSECCache.py
@@ -11,6 +11,7 @@ class AggressiveNSECCacheBase(RecursorTest):
      _wsTimeout = 10
      _wsPassword = 'secretpassword'
      _apiKey = 'secretapikey'
+    #_recursorStartupDelay = 4.0
      _config_template = """
      dnssec=validate
      aggressive-nsec-cache-size=10000
@@ -19,12 +20,14 @@ class AggressiveNSECCacheBase(RecursorTest):
      webserver-address=127.0.0.1
      webserver-password=%s
      api-key=%s
+    devonly-regression-test-mode
      """ % (_wsPort, _wsPassword, _apiKey)
  
      @classmethod
      def wipe(cls):
          confdir = os.path.join('configs', cls._confdir)
-        cls.wipeRecursorCache(confdir)
+        # Only wipe examples, as wiping the root triggers root NS refreshes
+        cls.wipeRecursorCache(confdir, "example$")
  
      def getMetric(self, name):
          headers = {'x-api-key': self._apiKey}
@@ -42,8 +45,8 @@ class AggressiveNSECCacheBase(RecursorTest):
          self.assertTrue(False)
  
      def testNoData(self):
-
          self.wipe()
+
          # first we query a non-existent type, to get the NSEC in our cache
          entries = self.getMetric('aggressive-nsec-cache-entries')
          res = self.sendQuery('host1.secure.example.', 'TXT')
@@ -72,8 +75,8 @@ class AggressiveNSECCacheNSEC(AggressiveNSECCacheBase):
      # we can't use the same tests for NSEC and NSEC3 because the hashed NSEC3s
      # do not deny the same names than the non-hashed NSECs do
      def testNXD(self):
-
          self.wipe()
+
          # first we query a non-existent name, to get the needed NSECs (name + widcard) in our cache
          entries = self.getMetric('aggressive-nsec-cache-entries')
          hits = self.getMetric('aggressive-nsec-cache-nsec-hits')
@@ -100,8 +103,8 @@ class AggressiveNSECCacheNSEC(AggressiveNSECCacheBase):
          self.assertGreater(self.getMetric('aggressive-nsec-cache-nsec-hits'), hits)
  
      def testWildcard(self):
-
          self.wipe()
+
          # first we query a non-existent name, but for which a wildcard matches,
          # to get the NSEC in our cache
          res = self.sendQuery('test1.wildcard.secure.example.', 'A')
@@ -231,6 +234,7 @@ class AggressiveNSECCacheNSEC3(AggressiveNSECCacheBase):
              raise AssertionError('%s failed (%d): %s' % (pdnsutilCmd, e.returncode, e.output))
  
      def testNXD(self):
+        self.wipe()
  
          # first we query a non-existent name, to get the needed NSEC3s in our cache
          res = self.sendQuery('host2.secure.example.', 'TXT')
@@ -250,6 +254,7 @@ class AggressiveNSECCacheNSEC3(AggressiveNSECCacheBase):
          self.assertEqual(nbQueries, self.getMetric('all-outqueries'))
  
      def testWildcard(self):
+        self.wipe()
  
          # first let's get the SOA and wildcard NSEC in our cache by asking a name that matches the wildcard
          # but a type that does not exist
@@ -297,6 +302,8 @@ class AggressiveNSECCacheNSEC3(AggressiveNSECCacheBase):
          self.assertEqual(nbQueries, self.getMetric('all-outqueries'))
  
      def test_OptOut(self):
+        self.wipe()
+
          # query a name in an opt-out zone
          res = self.sendQuery('ns2.optout.example.', 'A')
          self.assertRcodeEqual(res, dns.rcode.NXDOMAIN)
diff --git a/regression-tests.recursor-dnssec/test_RootNXTrust.py b/regression-tests.recursor-dnssec/test_RootNXTrust.py

index 62f6537e2c559f3831894feb83a360e95f29cb09..b9d1b0b1562f34e0fbce979a5a4670850be46407 100644 (file)
--- a/regression-tests.recursor-dnssec/test_RootNXTrust.py
+++ b/regression-tests.recursor-dnssec/test_RootNXTrust.py
@@ -46,6 +46,7 @@ webserver-port=%d
  webserver-address=127.0.0.1
  webserver-password=%s
  api-key=%s
+devonly-regression-test-mode
  """ % (_wsPort, _wsPassword, _apiKey)
  
      def testRootNXTrust(self):
@@ -94,6 +95,7 @@ webserver-port=%d
  webserver-address=127.0.0.1
  webserver-password=%s
  api-key=%s
+devonly-regression-test-mode
  """ % (_wsPort, _wsPassword, _apiKey)
  
      def testRootNXTrust(self):
diff --git a/regression-tests.recursor-dnssec/test_SimpleDoT.py b/regression-tests.recursor-dnssec/test_SimpleDoT.py

index 0994a47ec9c0e4709a6e970aa16ddf93f6a867d3..1026001d9ecc35a4e9ff832f6707808dd760ee2e 100644 (file)
--- a/regression-tests.recursor-dnssec/test_SimpleDoT.py
+++ b/regression-tests.recursor-dnssec/test_SimpleDoT.py
@@ -12,6 +12,7 @@ class testSimpleDoT(RecursorTest):
      _config_template = """
  dnssec=validate
  dot-to-auth-names=powerdns.com
+devonly-regression-test-mode
      """
  
      _roothints = None
diff --git a/regression-tests.recursor-dnssec/test_SimpleForwardOverDoT.py b/regression-tests.recursor-dnssec/test_SimpleForwardOverDoT.py

index 95e2fbca04033713058343eace766212d809c412..d839d43084ecec58e43d1ff642611ced797c72ed 100644 (file)
--- a/regression-tests.recursor-dnssec/test_SimpleForwardOverDoT.py
+++ b/regression-tests.recursor-dnssec/test_SimpleForwardOverDoT.py
@@ -12,6 +12,7 @@ class testSimpleForwardOverDoT(RecursorTest):
      _config_template = """
  dnssec=validate
  forward-zones-recurse=.=9.9.9.9:853
+devonly-regression-test-mode
      """
  
      @classmethod
author	Otto Moerbeek <otto.moerbeek@open-xchange.com>
	Tue, 8 Nov 2022 10:10:54 +0000 (11:10 +0100)
committer	Otto Moerbeek <otto.moerbeek@open-xchange.com>
	Mon, 12 Dec 2022 12:14:02 +0000 (13:14 +0100)
pdns/histogram.hh		patch \| blob \| blame \| history
pdns/lwres.cc		patch \| blob \| blame \| history
pdns/pdns_recursor.cc		patch \| blob \| blame \| history
pdns/rec_channel_rec.cc		patch \| blob \| blame \| history
pdns/recursordist/Makefile.am		patch \| blob \| blame \| history
pdns/recursordist/rec-main.cc		patch \| blob \| blame \| history
pdns/recursordist/rec-main.hh		patch \| blob \| blame \| history
pdns/recursordist/rec-tcounters.cc	[new file with mode: 0644]	patch \| blob
pdns/recursordist/rec-tcounters.hh	[new file with mode: 0644]	patch \| blob
pdns/recursordist/rec-tcp.cc		patch \| blob \| blame \| history
pdns/recursordist/tcounters.hh	[new symlink]	patch \| blob
pdns/recursordist/test-rec-tcounters_cc.cc	[new file with mode: 0644]	patch \| blob
pdns/recursordist/test-syncres_cc.cc		patch \| blob \| blame \| history
pdns/syncres.cc		patch \| blob \| blame \| history
pdns/syncres.hh		patch \| blob \| blame \| history
pdns/tcounters.hh	[new file with mode: 0644]	patch \| blob
regression-tests.recursor-dnssec/recursortests.py		patch \| blob \| blame \| history
regression-tests.recursor-dnssec/test_AggressiveNSECCache.py		patch \| blob \| blame \| history
regression-tests.recursor-dnssec/test_RootNXTrust.py		patch \| blob \| blame \| history
regression-tests.recursor-dnssec/test_SimpleDoT.py		patch \| blob \| blame \| history
regression-tests.recursor-dnssec/test_SimpleForwardOverDoT.py		patch \| blob \| blame \| history