Start using new Histogram class and refactor accounting to always

author Otto <otto.moerbeek@open-xchange.com>

Fri, 19 Feb 2021 09:24:48 +0000 (10:24 +0100)

committer Otto <otto.moerbeek@open-xchange.com>

Fri, 19 Feb 2021 10:05:39 +0000 (11:05 +0100)
author Otto <otto.moerbeek@open-xchange.com>
Fri, 19 Feb 2021 09:24:48 +0000 (10:24 +0100)
committer Otto <otto.moerbeek@open-xchange.com>
Fri, 19 Feb 2021 10:05:39 +0000 (11:05 +0100)
diff --git a/pdns/histogram.hh b/pdns/histogram.hh

index d5d32022958c983cdf3774c7d6d526ac47c726df..34f429d7c641763b93c57e9a5cb7d57155edeaf0 100644 (file)
--- a/pdns/histogram.hh
+++ b/pdns/histogram.hh
@@ -1,9 +1,29 @@
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
  #pragma once
  
  #include <algorithm>
  #include <atomic>
  #include <vector>
-#include <sstream>
  
  namespace pdns {
  
@@ -11,8 +31,8 @@ namespace pdns {
  struct Bucket
  {
    std::string d_name;
-  uint64_t d_boundary;
-  uint64_t d_count;
+  uint64_t d_boundary{0};
+  uint64_t d_count{0};
  };
  
  inline bool operator<(uint64_t b, const Bucket& bu)
@@ -23,15 +43,14 @@ inline bool operator<(uint64_t b, const Bucket& bu)
  
  struct AtomicBucket
  {
-  // We need the constrcutors in this case, since atomics have a disabled
-  // copy constructor.
+  // We need the constructors in this case, since atomics have a disabled copy constructor.
    AtomicBucket() {}
    AtomicBucket(std::string name, uint64_t boundary, uint64_t val) : d_name(std::move(name)), d_boundary(boundary), d_count(val) {}
    AtomicBucket(const AtomicBucket& rhs) : d_name(rhs.d_name), d_boundary(rhs.d_boundary), d_count(rhs.d_count.load()) {}
  
    std::string d_name;
-  uint64_t d_boundary;
-  std::atomic<uint64_t> d_count;
+  uint64_t d_boundary{0};
+  std::atomic<uint64_t> d_count{0};
  };
  
  inline bool operator<(uint64_t b, const AtomicBucket& bu)
@@ -57,12 +76,10 @@ public:
      }
      d_buckets.reserve(boundaries.size() + 1);
      for (auto b: boundaries) {
-      // to_string gives too many .00000's
-      std::ostringstream str;
-      str << prefix << "le-" << b;
-      d_buckets.push_back(B{str.str(), b, 0});
+      std::string str = prefix + "le-" + std::to_string(b);
+      d_buckets.push_back(B{str, b, 0});
      }
-    // everything above last boundary, plus NaN, Inf etc
+    // everything above last boundary
      d_buckets.push_back(B{prefix + "le-max", std::numeric_limits<uint64_t>::max(), 0});
    }
  
diff --git a/pdns/misc.hh b/pdns/misc.hh

index a6c926da70a497707107bf291e49665db16e0e8b..3ee14f8ad782771bee9aae4d1d174946de40562f 100644 (file)
--- a/pdns/misc.hh
+++ b/pdns/misc.hh
@@ -312,9 +312,9 @@ inline float makeFloat(const struct timeval& tv)
  {
    return tv.tv_sec + tv.tv_usec/1000000.0f;
  }
-inline double makeDouble(const struct timeval& tv)
+inline uint64_t uSec(const struct timeval& tv)
  {
-  return tv.tv_sec + tv.tv_usec/1000000.0;
+  return tv.tv_sec * 1000000 + tv.tv_usec;
  }
  
  inline bool operator<(const struct timeval& lhs, const struct timeval& rhs)
diff --git a/pdns/pdns_recursor.cc b/pdns/pdns_recursor.cc

index c330997aa55e501cd8b02a8600bc7021a894b973..81baa50558dce11e6923c88b41c3b08ddc08cf31 100644 (file)
--- a/pdns/pdns_recursor.cc
+++ b/pdns/pdns_recursor.cc
@@ -2142,11 +2142,13 @@ static void startDoResolve(void *p)
        finishTCPReply(dc, hadError, true);
      }
  
-    double spent = makeDouble(sr.getNow() - dc->d_now);
+    // Originally this code used a mix of floats, doubles, uint64_t with different units.
+    // Now it always uses an integral number of microseconds, expect for avarages, which are using doubles
+    uint64_t spentUsec = uSec(sr.getNow() - dc->d_now);
      if (!g_quiet) {
        g_log<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
        g_log<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
-       sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
+       sr.d_totUsec/1000.0<<" netw ms, "<< spentUsec/1000.0<<" tot ms, "<<
         sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
  
        if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
@@ -2162,41 +2164,17 @@ static void startDoResolve(void *p)
        g_recCache->cacheHits++;
      }
  
-    if (spent < 0.001)
-      g_stats.answers0_1++;
-    else if(spent < 0.01)
-      g_stats.answers1_10++;
-    else if(spent < 0.1)
-      g_stats.answers10_100++;
-    else if(spent < 1.0)
-      g_stats.answers100_1000++;
-    else
-      g_stats.answersSlow++;
+    g_stats.answers(spentUsec);
  
-    double newLat = spent * 1000000.0;
+    double newLat = spentUsec;
      newLat = min(newLat, g_networkTimeoutMsec * 1000.0); // outliers of several minutes exist..
      g_stats.avgLatencyUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyUsec + newLat / g_latencyStatSize;
      // no worries, we do this for packet cache hits elsewhere
  
-    double ourtime = 1000.0 * spent - sr.d_totUsec / 1000.0; // in msec
-    if(ourtime < 1)
-      g_stats.ourtime0_1++;
-    else if(ourtime < 2)
-      g_stats.ourtime1_2++;
-    else if(ourtime < 4)
-      g_stats.ourtime2_4++;
-    else if(ourtime < 8)
-      g_stats.ourtime4_8++;
-    else if(ourtime < 16)
-      g_stats.ourtime8_16++;
-    else if(ourtime < 32)
-      g_stats.ourtime16_32++;
-    else {
-      g_stats.ourtimeSlow++;
-    }
-
-    if (ourtime >= 0) {
-      newLat = ourtime * 1000.0; // usec
+    if (spentUsec >= sr.d_totUsec) {
+      uint64_t ourtime = spentUsec - sr.d_totUsec; 
+      g_stats.ourtime(ourtime);
+      newLat = ourtime; // usec
        g_stats.avgLatencyOursUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyOursUsec + newLat / g_latencyStatSize;
      }
  
@@ -2389,6 +2367,11 @@ static bool checkForCacheHit(bool qnameParsed, unsigned int tag, const string& d
      }
      g_stats.avgLatencyUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
      g_stats.avgLatencyOursUsec = (1.0 - 1.0 / g_latencyStatSize) * g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
+#if 0
+    // XXX changes behaviour compared to old code!
+    g_stats.answers(0);
+    g_stats.ourtime(0);
+#endif
    }
    return cacheHit;
  }
diff --git a/pdns/rec_channel_rec.cc b/pdns/rec_channel_rec.cc

index 78a08b7c5fcc551660e8006ff1e649666a243f92..be765cc090d756a46a4d132fb390fa10634703df 100644 (file)
--- a/pdns/rec_channel_rec.cc
+++ b/pdns/rec_channel_rec.cc
@@ -969,11 +969,6 @@ static uint64_t doGetCacheSize()
    return g_recCache->size();
  }
  
-static uint64_t doGetAvgLatencyUsec()
-{
-  return (uint64_t) g_stats.avgLatencyUsec;
-}
-
  static uint64_t doGetCacheBytes()
  {
    return g_recCache->bytes();
@@ -1075,34 +1070,33 @@ static void registerAllStats1()
    addGetStat("truncated-drops", &g_stats.truncatedDrops);
    addGetStat("query-pipe-full-drops", &g_stats.queryPipeFullDrops);
  
-  addGetStat("answers0-1", &g_stats.answers0_1);
-  addGetStat("answers1-10", &g_stats.answers1_10);
-  addGetStat("answers10-100", &g_stats.answers10_100);
-  addGetStat("answers100-1000", &g_stats.answers100_1000);
-  addGetStat("answers-slow", &g_stats.answersSlow);
-
-  addGetStat("x-ourtime0-1", &g_stats.ourtime0_1);
-  addGetStat("x-ourtime1-2", &g_stats.ourtime1_2);
-  addGetStat("x-ourtime2-4", &g_stats.ourtime2_4);
-  addGetStat("x-ourtime4-8", &g_stats.ourtime4_8);
-  addGetStat("x-ourtime8-16", &g_stats.ourtime8_16);
-  addGetStat("x-ourtime16-32", &g_stats.ourtime16_32);
-  addGetStat("x-ourtime-slow", &g_stats.ourtimeSlow);
-
-  addGetStat("auth4-answers0-1", &g_stats.auth4Answers0_1);
-  addGetStat("auth4-answers1-10", &g_stats.auth4Answers1_10);
-  addGetStat("auth4-answers10-100", &g_stats.auth4Answers10_100);
-  addGetStat("auth4-answers100-1000", &g_stats.auth4Answers100_1000);
-  addGetStat("auth4-answers-slow", &g_stats.auth4AnswersSlow);
-
-  addGetStat("auth6-answers0-1", &g_stats.auth6Answers0_1);
-  addGetStat("auth6-answers1-10", &g_stats.auth6Answers1_10);
-  addGetStat("auth6-answers10-100", &g_stats.auth6Answers10_100);
-  addGetStat("auth6-answers100-1000", &g_stats.auth6Answers100_1000);
-  addGetStat("auth6-answers-slow", &g_stats.auth6AnswersSlow);
-
-
-  addGetStat("qa-latency", doGetAvgLatencyUsec);
+  addGetStat("answers0-1", []() { return g_stats.answers.getCount(0); });
+  addGetStat("answers1-10", []() { return g_stats.answers.getCount(1); });
+  addGetStat("answers10-100", []() { return g_stats.answers.getCount(2); });
+  addGetStat("answers100-1000", []() { return g_stats.answers.getCount(3); });
+  addGetStat("answers-slow", []() { return g_stats.answers.getCount(4); });
+
+  addGetStat("x-ourtime0-1", []() { return g_stats.ourtime.getCount(0); });
+  addGetStat("x-ourtime1-2", []() { return g_stats.ourtime.getCount(1); });
+  addGetStat("x-ourtime2-4", []() { return g_stats.ourtime.getCount(2); });
+  addGetStat("x-ourtime4-8", []() { return g_stats.ourtime.getCount(3); });
+  addGetStat("x-ourtime8-16", []() { return g_stats.ourtime.getCount(4); });
+  addGetStat("x-ourtime16-32", []() { return g_stats.ourtime.getCount(5); });
+  addGetStat("x-ourtime-slow", []() { return g_stats.ourtime.getCount(6); });
+
+  addGetStat("auth4-answers0-1", []() { return g_stats.auth4Answers.getCount(0); });
+  addGetStat("auth4-answers1-10", []() { return g_stats.auth4Answers.getCount(1); });
+  addGetStat("auth4-answers10-100", []() { return g_stats.auth4Answers.getCount(2); });
+  addGetStat("auth4-answers100-1000", []() { return g_stats.auth4Answers.getCount(3); });
+  addGetStat("auth4-answers-slow", []() { return g_stats.auth4Answers.getCount(4); });
+
+  addGetStat("auth6-answers0-1", []() { return g_stats.auth6Answers.getCount(0); });
+  addGetStat("auth6-answers1-10", []() { return g_stats.auth6Answers.getCount(1); });
+  addGetStat("auth6-answers10-100", []() { return g_stats.auth6Answers.getCount(2); });
+  addGetStat("auth6-answers100-1000", []() { return g_stats.auth6Answers.getCount(3); });
+  addGetStat("auth6-answers-slow", []() { return g_stats.auth6Answers.getCount(4); });
+
+  addGetStat("qa-latency", []() { return g_stats.avgLatencyUsec.load(); });
    addGetStat("x-our-latency", []() { return g_stats.avgLatencyOursUsec.load(); });
    addGetStat("unexpected-packets", &g_stats.unexpectedCount);
    addGetStat("case-mismatches", &g_stats.caseMismatchCount);
diff --git a/pdns/recursordist/test-histogram_hh.cc b/pdns/recursordist/test-histogram_hh.cc

index 357c05a9f018f21915df3363fd974c61ef16933d..66e03e24091582b8ca0bdc88dc7d6ebda36217fa 100644 (file)
--- a/pdns/recursordist/test-histogram_hh.cc
+++ b/pdns/recursordist/test-histogram_hh.cc
@@ -1,3 +1,25 @@
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
  #define BOOST_TEST_DYN_LINK
  #define BOOST_TEST_NO_MAIN
  
diff --git a/pdns/syncres.cc b/pdns/syncres.cc

index b9b12c218ec2343babbad59937b65107b2f76636..07cd2eb5da472da16eea9141f98575955cc1ab19 100644 (file)
--- a/pdns/syncres.cc
+++ b/pdns/syncres.cc
@@ -98,32 +98,13 @@ unsigned int SyncRes::s_refresh_ttlperc;
  
  #define LOG(x) if(d_lm == Log) { g_log <<Logger::Warning << x; } else if(d_lm == Store) { d_trace << x; }
  
-static void accountAuthLatency(int usec, int family)
+static inline void accountAuthLatency(uint64_t usec, int family)
  {
-  if(family == AF_INET) {
-    if(usec < 1000)
-      g_stats.auth4Answers0_1++;
-    else if(usec < 10000)
-      g_stats.auth4Answers1_10++;
-    else if(usec < 100000)
-      g_stats.auth4Answers10_100++;
-    else if(usec < 1000000)
-      g_stats.auth4Answers100_1000++;
-    else
-      g_stats.auth4AnswersSlow++;
+  if (family == AF_INET) {
+    g_stats.auth4Answers(usec);
    } else  {
-    if(usec < 1000)
-      g_stats.auth6Answers0_1++;
-    else if(usec < 10000)
-      g_stats.auth6Answers1_10++;
-    else if(usec < 100000)
-      g_stats.auth6Answers10_100++;
-    else if(usec < 1000000)
-      g_stats.auth6Answers100_1000++;
-    else
-      g_stats.auth6AnswersSlow++;
+    g_stats.auth6Answers(usec);
    }
-
  }
  
  
@@ -867,7 +848,7 @@ int SyncRes::doResolveNoQNameMinimization(const DNSName &qname, const QType qtyp
            accountAuthLatency(lwr.d_usec, remoteIP.sin4.sin_family);
            if (fromCache)
              *fromCache = true;
-          
+
            // filter out the good stuff from lwr.result()
            if (resolveRet == LWResult::Result::Success) {
              for(const auto& rec : lwr.d_records) {
diff --git a/pdns/syncres.hh b/pdns/syncres.hh

index 7632e322c027dd64ec0bb68f9a2a5971e71e0573..0004754045463a335bd08acac977308d512e3a5c 100644 (file)
--- a/pdns/syncres.hh
+++ b/pdns/syncres.hh
@@ -51,6 +51,7 @@
  #include "negcache.hh"
  #include "proxy-protocol.hh"
  #include "sholder.hh"
+#include "histogram.hh"
  
  #ifdef HAVE_CONFIG_H
  #include "config.h"
@@ -976,10 +977,10 @@ struct RecursorStats
    std::atomic<uint64_t> servFails;
    std::atomic<uint64_t> nxDomains;
    std::atomic<uint64_t> noErrors;
-  std::atomic<uint64_t> answers0_1, answers1_10, answers10_100, answers100_1000, answersSlow;
-  std::atomic<uint64_t> auth4Answers0_1, auth4Answers1_10, auth4Answers10_100, auth4Answers100_1000, auth4AnswersSlow;
-  std::atomic<uint64_t> auth6Answers0_1, auth6Answers1_10, auth6Answers10_100, auth6Answers100_1000, auth6AnswersSlow;
-  std::atomic<uint64_t> ourtime0_1, ourtime1_2, ourtime2_4, ourtime4_8, ourtime8_16, ourtime16_32, ourtimeSlow;
+  pdns::AtomicHistogram<uint64_t> answers;
+  pdns::AtomicHistogram<uint64_t> auth4Answers;
+  pdns::AtomicHistogram<uint64_t> auth6Answers;
+  pdns::AtomicHistogram<uint64_t> ourtime;
    std::atomic<double> avgLatencyUsec;
    std::atomic<double> avgLatencyOursUsec;
    std::atomic<uint64_t> qcounter;     // not increased for unauth packets
@@ -1021,6 +1022,14 @@ struct RecursorStats
    std::atomic<uint64_t> rebalancedQueries{0};
    std::atomic<uint64_t> proxyProtocolInvalidCount{0};
    std::atomic<uint64_t> nodLookupsDroppedOversize{0};
+
+  RecursorStats() :
+    answers("answers", { 1000, 10000, 100000, 1000000 }),
+    auth4Answers("answers", { 1000, 10000, 100000, 1000000 }),
+    auth6Answers("answers", { 1000, 10000, 100000, 1000000 }),
+    ourtime("ourtime", { 1000, 2000, 4000, 8000, 16000, 32000 })
+  {
+  }
  };
  
  //! represents a running TCP/IP client session
author	Otto <otto.moerbeek@open-xchange.com>
	Fri, 19 Feb 2021 09:24:48 +0000 (10:24 +0100)
committer	Otto <otto.moerbeek@open-xchange.com>
	Fri, 19 Feb 2021 10:05:39 +0000 (11:05 +0100)
pdns/histogram.hh		patch \| blob \| blame \| history
pdns/misc.hh		patch \| blob \| blame \| history
pdns/pdns_recursor.cc		patch \| blob \| blame \| history
pdns/rec_channel_rec.cc		patch \| blob \| blame \| history
pdns/recursordist/test-histogram_hh.cc		patch \| blob \| blame \| history
pdns/syncres.cc		patch \| blob \| blame \| history
pdns/syncres.hh		patch \| blob \| blame \| history