From: bert hubert Date: Fri, 14 Sep 2018 09:37:34 +0000 (+0200) Subject: This implements a clean re-do of an earlier PR to add the glorious log-log histograms... X-Git-Tag: dnsdist-1.3.3~59^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ef890b79fd411209cb2109c23a5d327fedce5dab;p=thirdparty%2Fpdns.git This implements a clean re-do of an earlier PR to add the glorious log-log histograms from https://blog.powerdns.com/2017/11/02/dns-performance-metrics-the-logarithmic-percentile-histogram/ As a side effect, 'dnsscope' no longer uses MOADNSParser which makes it both faster and more resilient. There are other speedups as well. --- diff --git a/docs/manpages/dnsscope.1.rst b/docs/manpages/dnsscope.1.rst index aa96e845db..cbf0a55d9d 100644 --- a/docs/manpages/dnsscope.1.rst +++ b/docs/manpages/dnsscope.1.rst @@ -23,6 +23,9 @@ INFILE flag set. By default, we process all DNS packets in *INFILE*. --ipv4= Process IPv4 packets. On by default, disable with **--ipv4 false**. --ipv6= Process IPv6 packets. On by default, disable with **--ipv6 false**. +--full-histogram Write out histogram with specified bin-size to 'full-histogram' +--log-histogram Write out a log-histogram of response times to 'log-histogram' +--no-servfail-stats Remove servfail responses from latency statistics --servfail-tree Figure out subtrees that generate servfails. -l, --load-stats Emit per-second load statistics (questions, answers, outstanding). -w , --write-failures Write weird packets to a PCAP file at *FILENAME*. diff --git a/pdns/Makefile.am b/pdns/Makefile.am index dfb1940859..ae3cb9f4cd 100644 --- a/pdns/Makefile.am +++ b/pdns/Makefile.am @@ -977,6 +977,7 @@ dnsreplay_SOURCES = \ dnswriter.cc dnswriter.hh \ ednsoptions.cc ednsoptions.hh \ ednssubnet.cc ednssubnet.hh \ + histog.hh \ iputils.cc \ logger.cc \ misc.cc \ diff --git a/pdns/anadns.hh b/pdns/anadns.hh index 3a3b8abb09..34657684ef 100644 --- a/pdns/anadns.hh +++ b/pdns/anadns.hh @@ -38,16 +38,17 @@ struct QuestionIdentifier bool operator<(const QuestionIdentifier& rhs) const { return - tie(d_source, d_dest, d_qname, d_qtype, d_id) < - tie(rhs.d_source, rhs.d_dest, rhs.d_qname, rhs.d_qtype, rhs.d_id); + tie(d_id, d_qtype, d_source, d_dest, d_qname) < + tie(rhs.d_id, rhs.d_qtype, rhs.d_source, rhs.d_dest, rhs.d_qname); } + // the canonical direction is that of the question - static QuestionIdentifier create(const ComboAddress& src, const ComboAddress& dst, const MOADNSParser& mdp) + static QuestionIdentifier create(const ComboAddress& src, const ComboAddress& dst, const struct dnsheader& header, const DNSName& qname, uint16_t qtype) { QuestionIdentifier ret; - if(mdp.d_header.qr) { + if(header.qr) { ret.d_source = dst; ret.d_dest = src; } @@ -55,12 +56,19 @@ struct QuestionIdentifier ret.d_source = src; ret.d_dest = dst; } - ret.d_qname=mdp.d_qname; - ret.d_qtype=mdp.d_qtype; - ret.d_id=mdp.d_header.id; + ret.d_qname=qname; + ret.d_qtype=qtype; + ret.d_id=ntohs(header.id); return ret; } + // the canonical direction is that of the question + static QuestionIdentifier create(const ComboAddress& src, const ComboAddress& dst, const MOADNSParser& mdp) + { + return create(src, dst, mdp.d_header, mdp.d_qname, mdp.d_qtype); + } + + ComboAddress d_source, d_dest; DNSName d_qname; diff --git a/pdns/dnsscope.cc b/pdns/dnsscope.cc index 7627b973d5..cd84b63a3a 100644 --- a/pdns/dnsscope.cc +++ b/pdns/dnsscope.cc @@ -23,6 +23,8 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif +#include "histog.hh" + #include "statbag.hh" #include "dnspcap.hh" #include "dnsparser.hh" @@ -35,11 +37,10 @@ #include #include "anadns.hh" #include - +#include #include #include "arguments.hh" #include "namespaces.hh" -#include #include "dnsrecords.hh" #include "statnode.hh" @@ -120,6 +121,13 @@ void visitor(const StatNode* node, const StatNode::Stat& selfstat, const StatNod } } +const struct timeval operator-(const struct pdns_timeval& lhs, const struct pdns_timeval& rhs) +{ + struct timeval a{lhs.tv_sec, lhs.tv_usec}, b{rhs.tv_sec, rhs.tv_usec}; + return operator-(a,b); +} + + int main(int argc, char** argv) try { @@ -130,8 +138,12 @@ try ("rd", po::value(), "If set to true, only process RD packets, to false only non-RD, unset: both") ("ipv4", po::value()->default_value(true), "Process IPv4 packets") ("ipv6", po::value()->default_value(true), "Process IPv6 packets") - ("servfail-tree", "Figure out subtrees that generate servfails") + ("log-histogram", "Write a log-histogram to file 'log-histogram'") + ("full-histogram", po::value(), "Write a log-histogram to file 'full-histogram' with this millisecond bin size") ("load-stats,l", po::value()->default_value(""), "if set, emit per-second load statistics (questions, answers, outstanding)") + ("no-servfail-stats", "Don't include servfails in response time stats") + ("servfail-tree", "Figure out subtrees that generate servfails") + ("stats-dir", po::value()->default_value("."), "Directory where statistics will be saved") ("write-failures,w", po::value()->default_value(""), "if set, write weird packets to this PCAP file") ("verbose,v", "be verbose"); @@ -156,7 +168,7 @@ try } if(files.empty() || g_vm.count("help")) { - cerr<<"Syntax: dnsscope filename.pcap"<(); bool doIPv6 = g_vm["ipv6"].as(); bool doServFailTree = g_vm.count("servfail-tree"); - int dnserrors=0, bogus=0; + bool noservfailstats = g_vm.count("no-servfail-stats"); + int dnserrors=0, parsefail=0; typedef map cumul_t; cumul_t cumul; unsigned int untracked=0, errorresult=0, reallylate=0, nonRDQueries=0, queries=0; @@ -185,16 +198,18 @@ try unsigned int answers=0, nonDNSIP=0, rdFilterMismatch=0; unsigned int dnssecOK=0, edns=0; unsigned int dnssecCD=0, dnssecAD=0; + unsigned int reuses=0; typedef map rcodes_t; rcodes_t rcodes; time_t lowestTime=2000000000, highestTime=0; time_t lastsec=0; LiveCounts lastcounts; - set requestors, recipients, rdnonra; + std::unordered_set requestors, recipients, rdnonra; typedef vector > pcounts_t; pcounts_t pcounts; OPTRecordContent::report(); + for(unsigned int fno=0; fno < files.size(); ++fno) { PcapPacketReader pr(files[fno]); PcapPacketWriter* pw=0; @@ -217,22 +232,28 @@ try continue; } } - MOADNSParser mdp(false, (const char*)pr.d_payload, pr.d_len); - if(haveRDFilter && mdp.d_header.rd != rdFilter) { + uint16_t qtype; + DNSName qname((const char*)pr.d_payload, pr.d_len, 12, false, &qtype); + struct dnsheader header; + memcpy(&header, (struct dnsheader*)pr.d_payload, 12); + + if(haveRDFilter && header.rd != rdFilter) { rdFilterMismatch++; continue; } - if(!mdp.d_header.qr && getEDNSOpts(mdp, &edo)) { - edns++; - if(edo.d_extFlags & EDNSOpts::DNSSECOK) - dnssecOK++; - if(mdp.d_header.cd) - dnssecCD++; - if(mdp.d_header.ad) - dnssecAD++; - } - + if(!header.qr) { + uint16_t udpsize, z; + if(getEDNSUDPPayloadSizeAndZ((const char*)pr.d_payload, pr.d_len, &udpsize, &z)) { + edns++; + if(z & EDNSOpts::DNSSECOK) + dnssecOK++; + if(header.cd) + dnssecCD++; + if(header.ad) + dnssecAD++; + } + } if(pr.d_ip->ip_v == 4) ++ipv4DNSPackets; @@ -257,12 +278,11 @@ try lowestTime=min((time_t)lowestTime, (time_t)pr.d_pheader.ts.tv_sec); highestTime=max((time_t)highestTime, (time_t)pr.d_pheader.ts.tv_sec); - string name=mdp.d_qname.toString()+"|"+DNSRecordContent::NumberToType(mdp.d_qtype); - - QuestionIdentifier qi=QuestionIdentifier::create(pr.getSource(), pr.getDest(), mdp); + QuestionIdentifier qi=QuestionIdentifier::create(pr.getSource(), pr.getDest(), header, qname, qtype); - if(!mdp.d_header.qr) { // question - if(!mdp.d_header.rd) + if(!header.qr) { // question + // cout<<"Query "<write(); - dnserrors++; - continue; } catch(std::exception& e) { if(verbose) @@ -335,7 +362,7 @@ try if(pw) pw->write(); - bogus++; + parsefail++; continue; } } @@ -346,13 +373,22 @@ try cout<<"PCAP contained "< done_t; done_t done; - done[50]; - done[100]; - done[200]; - done[300]; - done[400]; - done[800]; - done[1000]; - done[2000]; - done[4000]; - done[8000]; - done[32000]; - done[64000]; - done[256000]; - done[1024000]; - done[2048000]; + for(auto a : {50, 100, 200, 300, 400, 800, 1000, 2000, 4000, 8000, 32000, 64000, 256000, 1024000, 2048000}) + done[a]=false; cout.setf(std::ios::fixed); - cout.precision(2); + cout.precision(4); sum=0; + + if(g_vm.count("log-histogram")) { + string fname = g_vm["stats-dir"].as()+"/log-histogram"; + ofstream loglog(fname); + if(!loglog) + throw runtime_error("Unable to write statistics to "+fname); + + writeLogHistogramFile(cumul, loglog); + } + + if(g_vm.count("full-histogram")) { + string fname=g_vm["stats-dir"].as()+"/full-histogram"; + ofstream loglog(fname); + if(!loglog) + throw runtime_error("Unable to write statistics to "+fname); + writeFullHistogramFile(cumul, g_vm["full-histogram"].as(), loglog); + } + + sum=0; double lastperc=0, perc=0; for(cumul_t::const_iterator i=cumul.begin(); i!=cumul.end(); ++i) { sum+=i->second; diff --git a/pdns/histog.hh b/pdns/histog.hh new file mode 100644 index 0000000000..fb1ce6c9c9 --- /dev/null +++ b/pdns/histog.hh @@ -0,0 +1,105 @@ +#pragma once +#ifdef L +#error "Include this file BEFORE including logger.hh" +#endif + +#include +#include + +#include +#include +#include +#include + +struct LogHistogramBin +{ + double percentile; + double latLimit; + double latAverage; + double latMedian; + double latStddev; + uint64_t count; + double cumulLatAverage; + double cumulLatMedian; +}; + +template +std::vector createLogHistogram(const T& bins, + std::deque percentiles={0.001, 0.01, 0.1, 0.2, 0.5, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 94, 95, 96, 97, 97.5, 98, 98.5, 99, 99.5, 99.6, 99.9, 99.99, 99.999, 99.9999}) +{ + uint64_t totcumul=0, sum=0; + + for(const auto& c: bins) { + totcumul += c.second; + } + + namespace ba=boost::accumulators; + ba::accumulator_set, unsigned> acc; + + ba::accumulator_set, unsigned int> cumulstats; + + uint64_t bincount=0; + std::vector ret; + for(const auto& c: bins) { + if(percentiles.empty()) + break; + sum += c.second; + bincount += c.second; + + acc(c.first/1000.0, ba::weight=c.second); + for(unsigned int i=0; i < c.second; ++i) + cumulstats(c.first/1000.0, ba::weight=1); // "weighted" does not work for median + if(sum > percentiles.front() * totcumul / 100.0) { + ret.push_back({100.0-percentiles.front(), (double)c.first/1000.0, ba::mean(acc), ba::median(acc), sqrt(ba::variance(acc)), bincount, ba::mean(cumulstats), ba::median(cumulstats)}); + + percentiles.pop_front(); + acc=decltype(acc)(); + bincount=0; + } + } + std::sort(ret.begin(), ret.end(), [](const LogHistogramBin& a, const LogHistogramBin& b) { + return a.percentile < b.percentile; + }); + return ret; +} + +template +void writeLogHistogramFile(const T& bins, std::ostream& out, std::deque percentiles={0.001, 0.01, 0.1, 0.2, 0.5, 1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 94, 95, 96, 97, 97.5, 98, 98.5, 99, 99.5, 99.6, 99.9, 99.99, 99.999, 99.9999} ) +{ + + auto vec = createLogHistogram(bins, percentiles); + out<< R"(# set logscale xy +# set mxtics 10 +# set mytics 10 +# set grid xtics +# set grid ytics +# set xlabel "Slowest percentile" +# set ylabel "Millisecond response time" +# set terminal svg +# set output 'log-histogram.svg' +# plot 'log-histogram' using 1:2 with linespoints title 'Average latency per percentile', \ +# 'log-histogram' using 1:6 with linespoints title 'Cumulative average latency', \ +# 'log-histogram' using 1:7 with linespoints title 'Cumulative median latency')"<<"\n"; + + out<<"# slow-percentile usec-latency-mean usec-latency-max usec-latency-median usec-latency-stddev usec-latency-cumul usec-latency-median-cumul num-queries\n"; + + + for(const auto& e : vec) { + out< +void writeFullHistogramFile(const T& bins, double binMsec, std::ofstream& out) +{ + std::map reducedBins; + for(const auto& b : bins) { + reducedBins[b.first/(1000.0*binMsec)]+=b.second; + } + out<<"# msec-bin-low count\n"; + for(const auto& rb : reducedBins) { + out<