]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
This implements a clean re-do of an earlier PR to add the glorious log-log histograms...
authorbert hubert <bert.hubert@netherlabs.nl>
Fri, 14 Sep 2018 09:37:34 +0000 (11:37 +0200)
committerbert hubert <bert.hubert@netherlabs.nl>
Fri, 14 Sep 2018 09:52:43 +0000 (11:52 +0200)
As a side effect, 'dnsscope' no longer uses MOADNSParser which makes it both faster and more resilient. There are other speedups as well.

docs/manpages/dnsscope.1.rst
pdns/Makefile.am
pdns/anadns.hh
pdns/dnsscope.cc
pdns/histog.hh [new file with mode: 0644]

index aa96e845dbe6d7fc666fb7efced17cfdd5caa154..cbf0a55d9d7a3e0201172440e5b8f63bd7bfb132 100644 (file)
@@ -23,6 +23,9 @@ INFILE
                                        flag set. By default, we process all DNS packets in *INFILE*.
 --ipv4=<state>                         Process IPv4 packets. On by default, disable with **--ipv4 false**.
 --ipv6=<state>                         Process IPv6 packets. On by default, disable with **--ipv6 false**.
+--full-histogram <msec>                Write out histogram with specified bin-size to 'full-histogram'
+--log-histogram                        Write out a log-histogram of response times to 'log-histogram'
+--no-servfail-stats                    Remove servfail responses from latency statistics
 --servfail-tree                        Figure out subtrees that generate servfails.
 -l, --load-stats                       Emit per-second load statistics (questions, answers, outstanding).
 -w <file>, --write-failures <file>     Write weird packets to a PCAP file at *FILENAME*.
index dfb1940859ee6f9605c166ec45180cc8eab073fa..ae3cb9f4cd13b14ac22279e073bc1a5975b0fe6a 100644 (file)
@@ -977,6 +977,7 @@ dnsreplay_SOURCES = \
        dnswriter.cc dnswriter.hh \
        ednsoptions.cc ednsoptions.hh \
        ednssubnet.cc ednssubnet.hh \
+       histog.hh \
        iputils.cc \
        logger.cc \
        misc.cc \
index 3a3b8abb0922495bfc146134844510af9508926c..34657684ef782fa9efd747c2a7492f26ff72cb1a 100644 (file)
@@ -38,16 +38,17 @@ struct QuestionIdentifier
   bool operator<(const QuestionIdentifier& rhs) const
   {
     return 
-      tie(d_source, d_dest, d_qname, d_qtype, d_id) < 
-      tie(rhs.d_source, rhs.d_dest, rhs.d_qname, rhs.d_qtype, rhs.d_id);
+      tie(d_id,         d_qtype,     d_source,     d_dest,     d_qname) < 
+      tie(rhs.d_id, rhs.d_qtype, rhs.d_source, rhs.d_dest, rhs.d_qname);
   }
 
+
   // the canonical direction is that of the question
-  static QuestionIdentifier create(const ComboAddress& src, const ComboAddress& dst, const MOADNSParser& mdp)
+  static QuestionIdentifier create(const ComboAddress& src, const ComboAddress& dst, const struct dnsheader& header, const DNSName& qname, uint16_t qtype)
   {
     QuestionIdentifier ret;
 
-    if(mdp.d_header.qr) {
+    if(header.qr) {
       ret.d_source = dst;
       ret.d_dest = src;
     }
@@ -55,12 +56,19 @@ struct QuestionIdentifier
       ret.d_source = src;
       ret.d_dest = dst;
     }
-    ret.d_qname=mdp.d_qname;
-    ret.d_qtype=mdp.d_qtype;
-    ret.d_id=mdp.d_header.id;
+    ret.d_qname=qname;
+    ret.d_qtype=qtype;
+    ret.d_id=ntohs(header.id);
     return ret;
   }
 
+  // the canonical direction is that of the question
+  static QuestionIdentifier create(const ComboAddress& src, const ComboAddress& dst, const MOADNSParser& mdp)
+  {
+    return create(src, dst, mdp.d_header, mdp.d_qname, mdp.d_qtype);
+  }
+
+  
   ComboAddress d_source, d_dest;
 
   DNSName d_qname;
index 7627b973d58e7e9f82845ab4819a02060a91abc4..cd84b63a3a20b60ac9ec502afcc716b370d97e49 100644 (file)
@@ -23,6 +23,8 @@
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
+#include "histog.hh"
+
 #include "statbag.hh"
 #include "dnspcap.hh"
 #include "dnsparser.hh"
 #include <algorithm>
 #include "anadns.hh"
 #include <boost/program_options.hpp>
-
+#include <unordered_set>
 #include <boost/logic/tribool.hpp>
 #include "arguments.hh"
 #include "namespaces.hh"
-#include <deque>
 #include "dnsrecords.hh"
 #include "statnode.hh"
 
@@ -120,6 +121,13 @@ void visitor(const StatNode* node, const StatNode::Stat& selfstat, const StatNod
   }
 }
 
+const struct timeval operator-(const struct pdns_timeval& lhs, const struct pdns_timeval& rhs)
+{
+  struct timeval a{lhs.tv_sec, lhs.tv_usec}, b{rhs.tv_sec, rhs.tv_usec};
+  return operator-(a,b);
+}
+
+
 int main(int argc, char** argv)
 try
 {
@@ -130,8 +138,12 @@ try
     ("rd", po::value<bool>(), "If set to true, only process RD packets, to false only non-RD, unset: both")
     ("ipv4", po::value<bool>()->default_value(true), "Process IPv4 packets")
     ("ipv6", po::value<bool>()->default_value(true), "Process IPv6 packets")
-    ("servfail-tree", "Figure out subtrees that generate servfails")
+    ("log-histogram", "Write a log-histogram to file 'log-histogram'")
+    ("full-histogram", po::value<double>(), "Write a log-histogram to file 'full-histogram' with this millisecond bin size")
     ("load-stats,l", po::value<string>()->default_value(""), "if set, emit per-second load statistics (questions, answers, outstanding)")
+    ("no-servfail-stats", "Don't include servfails in response time stats")
+    ("servfail-tree", "Figure out subtrees that generate servfails")
+    ("stats-dir", po::value<string>()->default_value("."), "Directory where statistics will be saved")
     ("write-failures,w", po::value<string>()->default_value(""), "if set, write weird packets to this PCAP file")
     ("verbose,v", "be verbose");
     
@@ -156,7 +168,7 @@ try
   }
 
   if(files.empty() || g_vm.count("help")) {
-    cerr<<"Syntax: dnsscope filename.pcap"<<endl;
+    cerr<<"Syntax: dnsscope filename.pcap [filenam2.pcap...]"<<endl;
     cout << desc << endl;
     exit(0);
   }
@@ -177,7 +189,8 @@ try
   bool doIPv4 = g_vm["ipv4"].as<bool>();
   bool doIPv6 = g_vm["ipv6"].as<bool>();
   bool doServFailTree = g_vm.count("servfail-tree");
-  int dnserrors=0, bogus=0;
+  bool noservfailstats = g_vm.count("no-servfail-stats");
+  int dnserrors=0, parsefail=0;
   typedef map<uint32_t,uint32_t> cumul_t;
   cumul_t cumul;
   unsigned int untracked=0, errorresult=0, reallylate=0, nonRDQueries=0, queries=0;
@@ -185,16 +198,18 @@ try
   unsigned int answers=0, nonDNSIP=0, rdFilterMismatch=0;
   unsigned int dnssecOK=0, edns=0;
   unsigned int dnssecCD=0, dnssecAD=0;
+  unsigned int reuses=0;
   typedef map<uint16_t,uint32_t> rcodes_t;
   rcodes_t rcodes;
   
   time_t lowestTime=2000000000, highestTime=0;
   time_t lastsec=0;
   LiveCounts lastcounts;
-  set<ComboAddress, ComboAddress::addressOnlyLessThan> requestors, recipients, rdnonra;
+  std::unordered_set<ComboAddress, ComboAddress::addressOnlyHash> requestors, recipients, rdnonra;
   typedef vector<pair<time_t, LiveCounts> > pcounts_t;
   pcounts_t pcounts;
   OPTRecordContent::report();
+
   for(unsigned int fno=0; fno < files.size(); ++fno) {
     PcapPacketReader pr(files[fno]);
     PcapPacketWriter* pw=0;
@@ -217,22 +232,28 @@ try
              continue;
            }
          }
-         MOADNSParser mdp(false, (const char*)pr.d_payload, pr.d_len);
-         if(haveRDFilter && mdp.d_header.rd != rdFilter) {
+         uint16_t qtype;
+         DNSName qname((const char*)pr.d_payload, pr.d_len, 12, false, &qtype);
+         struct dnsheader header;
+         memcpy(&header, (struct dnsheader*)pr.d_payload, 12);
+
+         if(haveRDFilter && header.rd != rdFilter) {
            rdFilterMismatch++;
            continue;
          }
 
-         if(!mdp.d_header.qr && getEDNSOpts(mdp, &edo)) {
-           edns++;
-           if(edo.d_extFlags & EDNSOpts::DNSSECOK)
-             dnssecOK++;
-           if(mdp.d_header.cd)
-             dnssecCD++;
-           if(mdp.d_header.ad)
-             dnssecAD++;
-         }
-         
+         if(!header.qr) {
+            uint16_t udpsize, z;
+            if(getEDNSUDPPayloadSizeAndZ((const char*)pr.d_payload, pr.d_len, &udpsize, &z)) {
+              edns++;
+              if(z & EDNSOpts::DNSSECOK)
+                dnssecOK++;
+              if(header.cd)
+                dnssecCD++;
+              if(header.ad)
+                dnssecAD++;
+            }
+          }
 
          if(pr.d_ip->ip_v == 4) 
            ++ipv4DNSPackets;
@@ -257,12 +278,11 @@ try
          lowestTime=min((time_t)lowestTime,  (time_t)pr.d_pheader.ts.tv_sec);
          highestTime=max((time_t)highestTime, (time_t)pr.d_pheader.ts.tv_sec);
 
-         string name=mdp.d_qname.toString()+"|"+DNSRecordContent::NumberToType(mdp.d_qtype);
-        
-         QuestionIdentifier qi=QuestionIdentifier::create(pr.getSource(), pr.getDest(), mdp);
+         QuestionIdentifier qi=QuestionIdentifier::create(pr.getSource(), pr.getDest(), header, qname, qtype);
 
-         if(!mdp.d_header.qr) { // question
-           if(!mdp.d_header.rd)
+         if(!header.qr) { // question
+           //      cout<<"Query "<<qi<<endl;
+           if(!header.rd)
              nonRDQueries++;
            queries++;
 
@@ -270,64 +290,71 @@ try
            rem.sin4.sin_port=0;
            requestors.insert(rem);       
 
-           QuestionData& qd=statmap[qi];
+            QuestionData& qd=statmap[qi];
           
            if(!qd.d_firstquestiontime.tv_sec)
              qd.d_firstquestiontime=pr.d_pheader.ts;
-           qd.d_qcount++;
+           else {
+             auto delta=makeFloat(pr.d_pheader.ts - qd.d_firstquestiontime);
+             //              cout<<"Reuse of "<<qi<<", delta t="<<delta<<", count="<<qd.d_qcount<<endl;
+             if(delta > 2.0) {
+               //              cout<<"Resetting old entry for "<<qi<<", too old"<<endl;
+               qd.d_qcount=0;
+               qd.d_answercount=0;
+               qd.d_firstquestiontime=pr.d_pheader.ts;
+             }
+           }
+           if(qd.d_qcount++)
+              reuses++;
          }
          else  {  // answer
-           rcodes[mdp.d_header.rcode]++;
+           //      cout<<"Response "<<qi<<endl;
+           rcodes[header.rcode]++;
            answers++;
-           if(mdp.d_header.rd && !mdp.d_header.ra) {
+           if(header.rd && !header.ra) {
              rdNonRAAnswers++;
              rdnonra.insert(pr.getDest());
            }
          
-           if(mdp.d_header.ra) {
+           if(header.ra) {
              ComboAddress rem = pr.getDest();
              rem.sin4.sin_port=0;
              recipients.insert(rem);     
            }
 
            QuestionData& qd=statmap[qi];
-
-           if(!qd.d_qcount)
+           if(!qd.d_qcount) {
+             //              cout<<"Untracked answer: "<<qi<<endl;
              untracked++;
+           }
 
            qd.d_answercount++;
 
            if(qd.d_qcount) {
              uint32_t usecs= (pr.d_pheader.ts.tv_sec - qd.d_firstquestiontime.tv_sec) * 1000000 +  
                (pr.d_pheader.ts.tv_usec - qd.d_firstquestiontime.tv_usec) ;
-             //            cout<<"Took: "<<usecs<<"usec\n";
-             if(usecs<2049000)
-               cumul[usecs]++;
-             else
-               reallylate++;
+
+             //              cout<<"Usecs for "<<qi<<": "<<usecs<<endl;
+              if(!noservfailstats || header.rcode != 2)
+                cumul[usecs]++;
             
-             if(mdp.d_header.rcode != 0 && mdp.d_header.rcode!=3) 
+             if(header.rcode != 0 && header.rcode!=3) 
                errorresult++;
              ComboAddress rem = pr.getDest();
              rem.sin4.sin_port=0;
 
              if(doServFailTree)
-               root.submit(mdp.d_qname, mdp.d_header.rcode, rem);
+               root.submit(qname, header.rcode, rem);
            }
 
-           if(!qd.d_qcount || qd.d_qcount == qd.d_answercount)
+           if(!qd.d_qcount || qd.d_qcount == qd.d_answercount) {
+             //              cout<<"Clearing state for "<<qi<<endl<<endl;
              statmap.erase(qi);
+           }
+           else
+             ;//             cout<<"State for qi remains open, qcount="<<qd.d_qcount<<", answercount="<<qd.d_answercount<<endl;
+            
          }
-
-        
-       }
-       catch(MOADNSException& mde) {
-         if(verbose)
-           cout<<"error parsing packet: "<<mde.what()<<endl;
-         if(pw)
-           pw->write();
-         dnserrors++;
-         continue;
        }
        catch(std::exception& e) {
          if(verbose)
@@ -335,7 +362,7 @@ try
 
          if(pw)
            pw->write();
-         bogus++;
+         parsefail++;
          continue;
        }
       }
@@ -346,13 +373,22 @@ try
     cout<<"PCAP contained "<<pr.d_correctpackets<<" correct packets, "<<pr.d_runts<<" runts, "<< pr.d_oversized<<" oversize, "<<pr.d_nonetheripudp<<" non-UDP.\n";
 
   }
+
+  /*
+  cout<<"Open when done: "<<endl;
+  for(const auto& a : statmap) {
+    cout<<a.first<<": qcount="<<a.second.d_qcount<<", answercount="<<a.second.d_answercount<<endl;
+  }
+  */
+  
   cout<<"Timespan: "<<(highestTime-lowestTime)/3600.0<<" hours"<<endl;
 
-  cout<<nonDNSIP<<" non-DNS UDP, "<<dnserrors<<" dns decoding errors, "<<bogus<<" bogus packets"<<endl;
+  cout<<nonDNSIP<<" non-DNS UDP, "<<dnserrors<<" dns decoding errors, "<<parsefail<<" packets failed to parse"<<endl;
   cout<<"Ignored fragment packets: "<<fragmented<<endl;
   cout<<"Dropped DNS packets based on recursion-desired filter: "<<rdFilterMismatch<<endl;
   cout<<"DNS IPv4: "<<ipv4DNSPackets<<" packets, IPv6: "<<ipv6DNSPackets<<" packets"<<endl;
   cout<<"Questions: "<<queries<<", answers: "<<answers<<endl;
+  cout<<"Reuses of same state entry: "<<reuses<<endl;
   unsigned int unanswered=0;
 
 
@@ -388,26 +424,32 @@ try
   
   typedef map<uint32_t, bool> done_t;
   done_t done;
-  done[50];
-  done[100];
-  done[200];
-  done[300];
-  done[400];
-  done[800];
-  done[1000];
-  done[2000];
-  done[4000];
-  done[8000];
-  done[32000];
-  done[64000];
-  done[256000];
-  done[1024000];
-  done[2048000];
+  for(auto a : {50, 100, 200, 300, 400, 800, 1000, 2000, 4000, 8000, 32000, 64000, 256000, 1024000, 2048000})
+    done[a]=false;
 
   cout.setf(std::ios::fixed);
-  cout.precision(2);
+  cout.precision(4);
   sum=0;
+
+  if(g_vm.count("log-histogram")) {
+    string fname = g_vm["stats-dir"].as<string>()+"/log-histogram";
+    ofstream loglog(fname);
+    if(!loglog)
+      throw runtime_error("Unable to write statistics to "+fname);
+
+    writeLogHistogramFile(cumul, loglog);
+  }
+
+  if(g_vm.count("full-histogram")) {
+    string fname=g_vm["stats-dir"].as<string>()+"/full-histogram";
+    ofstream loglog(fname);
+    if(!loglog)
+      throw runtime_error("Unable to write statistics to "+fname);
+    writeFullHistogramFile(cumul, g_vm["full-histogram"].as<double>(), loglog);
+  }
+
   
+  sum=0;
   double lastperc=0, perc=0;
   for(cumul_t::const_iterator i=cumul.begin(); i!=cumul.end(); ++i) {
     sum+=i->second;
diff --git a/pdns/histog.hh b/pdns/histog.hh
new file mode 100644 (file)
index 0000000..fb1ce6c
--- /dev/null
@@ -0,0 +1,105 @@
+#pragma once
+#ifdef L
+#error "Include this file BEFORE including logger.hh"
+#endif
+
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics.hpp>
+
+#include <vector>
+#include <fstream>
+#include <deque>
+#include <map>
+
+struct LogHistogramBin
+{
+  double percentile;
+  double latLimit;
+  double latAverage;
+  double latMedian;
+  double latStddev;
+  uint64_t count;
+  double cumulLatAverage;
+  double cumulLatMedian;
+};
+
+template<typename T>
+std::vector<LogHistogramBin> createLogHistogram(const T& bins,
+                        std::deque<double> percentiles={0.001, 0.01, 0.1, 0.2, 0.5, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 94, 95, 96, 97, 97.5, 98, 98.5, 99, 99.5, 99.6, 99.9, 99.99, 99.999, 99.9999})
+{
+  uint64_t totcumul=0, sum=0;
+
+  for(const auto& c: bins) {
+    totcumul += c.second;
+  }
+
+  namespace ba=boost::accumulators;
+  ba::accumulator_set<double, ba::features<ba::tag::mean, ba::tag::median, ba::tag::variance>, unsigned> acc;
+
+  ba::accumulator_set<double, ba::features<ba::tag::mean, ba::tag::median, ba::tag::variance>, unsigned int> cumulstats;
+
+  uint64_t bincount=0;
+  std::vector<LogHistogramBin> ret;
+  for(const auto& c: bins) {
+    if(percentiles.empty())
+      break;
+    sum += c.second;
+    bincount += c.second;
+      
+    acc(c.first/1000.0, ba::weight=c.second);
+    for(unsigned int i=0; i < c.second; ++i)
+      cumulstats(c.first/1000.0, ba::weight=1); // "weighted" does not work for median
+    if(sum > percentiles.front() * totcumul / 100.0) {
+      ret.push_back({100.0-percentiles.front(), (double)c.first/1000.0, ba::mean(acc), ba::median(acc), sqrt(ba::variance(acc)), bincount, ba::mean(cumulstats), ba::median(cumulstats)});
+      
+      percentiles.pop_front();
+      acc=decltype(acc)();
+      bincount=0;
+    }
+  }
+  std::sort(ret.begin(), ret.end(), [](const LogHistogramBin& a, const LogHistogramBin& b) {
+      return a.percentile < b.percentile;
+    });
+  return ret;
+}
+
+template<typename T>
+void writeLogHistogramFile(const T& bins, std::ostream& out, std::deque<double> percentiles={0.001, 0.01, 0.1, 0.2, 0.5, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 94, 95, 96, 97, 97.5, 98, 98.5, 99, 99.5, 99.6, 99.9, 99.99, 99.999, 99.9999} )
+{
+
+  auto vec = createLogHistogram(bins, percentiles);
+  out<< R"(# set logscale xy
+# set mxtics 10
+# set mytics 10
+# set grid xtics
+# set grid ytics
+# set xlabel "Slowest percentile"
+# set ylabel "Millisecond response time"
+# set terminal svg
+# set output 'log-histogram.svg'
+# plot 'log-histogram' using 1:2 with linespoints title 'Average latency per percentile', \
+#      'log-histogram' using 1:6 with linespoints title 'Cumulative average latency', \
+#      'log-histogram' using 1:7 with linespoints title 'Cumulative median latency')"<<"\n";
+
+  out<<"# slow-percentile usec-latency-mean usec-latency-max usec-latency-median usec-latency-stddev usec-latency-cumul usec-latency-median-cumul num-queries\n";
+  
+  
+  for(const auto& e : vec) {
+    out<<e.percentile<<" "<<e.latAverage<<" "<<e.latLimit<<" "<<e.latMedian<<" "<<e.latStddev<<" "<<e.cumulLatAverage<<" "<<e.cumulLatMedian<<" "<<e.count<<"\n";
+  }
+  out.flush();
+}
+
+template<typename T>
+void writeFullHistogramFile(const T& bins, double binMsec, std::ofstream& out)
+{
+  std::map<unsigned int, uint64_t> reducedBins;
+  for(const auto& b : bins) {
+    reducedBins[b.first/(1000.0*binMsec)]+=b.second;
+  }
+  out<<"# msec-bin-low count\n";
+  for(const auto& rb : reducedBins) {
+    out<<rb.first*binMsec<<" "<<rb.second<<"\n";
+  }
+  out.flush();
+}