From: phonedph1 Date: Fri, 24 Aug 2018 14:23:14 +0000 (+0000) Subject: Keep track of what destinations are causing the most timeouts X-Git-Tag: dnsdist-1.3.3~129^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=621ccf89fe0d09d2d499b41e5a31e15a9fd08026;p=thirdparty%2Fpdns.git Keep track of what destinations are causing the most timeouts --- diff --git a/pdns/pdns_recursor.cc b/pdns/pdns_recursor.cc index c7e1bb3db8..63f5c40c93 100644 --- a/pdns/pdns_recursor.cc +++ b/pdns/pdns_recursor.cc @@ -116,7 +116,7 @@ thread_local std::unique_ptr MT; // the big MTasker thread_local std::unique_ptr t_RC; thread_local std::unique_ptr t_packetCache; thread_local FDMultiplexer* t_fdm{nullptr}; -thread_local std::unique_ptr t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes; +thread_local std::unique_ptr t_remotes, t_servfailremotes, t_largeanswerremotes, t_bogusremotes, t_timeouts; thread_local std::unique_ptr > > t_queryring, t_servfailqueryring, t_bogusqueryring; thread_local std::shared_ptr t_allowFrom; #ifdef HAVE_PROTOBUF @@ -3755,6 +3755,8 @@ try t_bogusremotes->set_capacity(ringsize); t_largeanswerremotes = std::unique_ptr(new addrringbuf_t()); t_largeanswerremotes->set_capacity(ringsize); + t_timeouts = std::unique_ptr(new addrringbuf_t()); + t_timeouts->set_capacity(ringsize); t_queryring = std::unique_ptr > >(new boost::circular_buffer >()); t_queryring->set_capacity(ringsize); diff --git a/pdns/rec_channel.hh b/pdns/rec_channel.hh index 526faa6a3f..1b8e4d8f30 100644 --- a/pdns/rec_channel.hh +++ b/pdns/rec_channel.hh @@ -73,6 +73,7 @@ std::vector* pleaseGetRemotes(); std::vector* pleaseGetServfailRemotes(); std::vector* pleaseGetBogusRemotes(); std::vector* pleaseGetLargeAnswerRemotes(); +std::vector* pleaseGetTimeouts(); DNSName getRegisteredName(const DNSName& dom); std::atomic* getDynMetric(const std::string& str); optional getStatByName(const std::string& name); diff --git a/pdns/rec_channel_rec.cc b/pdns/rec_channel_rec.cc index ad3cfdfdf0..ef69e736c7 100644 --- a/pdns/rec_channel_rec.cc +++ b/pdns/rec_channel_rec.cc @@ -1100,6 +1100,18 @@ vector* pleaseGetLargeAnswerRemotes() return ret; } +vector* pleaseGetTimeouts() +{ + vector* ret = new vector(); + if(!t_timeouts) + return ret; + ret->reserve(t_timeouts->size()); + for(const ComboAddress& ca : *t_timeouts) { + ret->push_back(ca); + } + return ret; +} + string doGenericTopRemotes(pleaseremotefunc_t func) { typedef map counts_t; @@ -1275,6 +1287,7 @@ string RecursorControlParser::getAnswer(const string& question, RecursorControlP "top-queries show top queries\n" "top-pub-queries show top queries grouped by public suffix list\n" "top-remotes show top remotes\n" +"top-timeouts show top downstream timeouts" "top-servfail-queries show top queries receiving servfail answers\n" "top-bogus-queries show top queries validating as bogus\n" "top-pub-servfail-queries show top queries receiving servfail answers grouped by public suffix list\n" @@ -1412,6 +1425,9 @@ string RecursorControlParser::getAnswer(const string& question, RecursorControlP if(cmd=="top-largeanswer-remotes") return doGenericTopRemotes(pleaseGetLargeAnswerRemotes); + if(cmd=="top-timeouts") + return doGenericTopRemotes(pleaseGetTimeouts); + if(cmd=="current-queries") return doCurrentQueries(); diff --git a/pdns/recursordist/docs/manpages/rec_control.1.rst b/pdns/recursordist/docs/manpages/rec_control.1.rst index cc4b1cd7ae..8739a9b1ca 100644 --- a/pdns/recursordist/docs/manpages/rec_control.1.rst +++ b/pdns/recursordist/docs/manpages/rec_control.1.rst @@ -237,6 +237,10 @@ top-bogus-remotes Shows the top-20 most active remote hosts causing bogus responses. Statistics are over the last 'stats-ringbuffer-entries' queries. +top-timeouts + Shows the top-20 most active downstream timeout destinations. + Statistics are over the last 'stats-ringbuffer-entries' queries. + trace-regex *REGEX* Emit resolution trace for matching queries. Empty regex to disable trace. diff --git a/pdns/recursordist/html/index.html b/pdns/recursordist/html/index.html index 9ae0f622bd..c913094e46 100644 --- a/pdns/recursordist/html/index.html +++ b/pdns/recursordist/html/index.html @@ -66,6 +66,7 @@
+
@@ -178,5 +179,20 @@ + + diff --git a/pdns/recursordist/html/local.js b/pdns/recursordist/html/local.js index c11b0b23ef..9e6fc03190 100644 --- a/pdns/recursordist/html/local.js +++ b/pdns/recursordist/html/local.js @@ -137,6 +137,11 @@ $(document).ready(function () { var rows = makeRingRows(data); render('bogusremotering', {rows: rows}); }); + $.getJSON('jsonstat', jsonstatParams('get-remote-ring', 'timeouts', false), + function (data) { + var rows = makeRingRows(data); + render('timeouts', {rows: rows}); + }); } var connectionOK = function (ok, o) { diff --git a/pdns/syncres.cc b/pdns/syncres.cc index fd2d0dfab2..dbe84a1b90 100644 --- a/pdns/syncres.cc +++ b/pdns/syncres.cc @@ -2484,6 +2484,8 @@ bool SyncRes::doResolveAtThisIP(const std::string& prefix, const DNSName& qname, else { // timeout t_sstorage.throttle.throttle(d_now.tv_sec, boost::make_tuple(remoteIP, qname, qtype.getCode()), 10, 5); + if(t_timeouts) + t_timeouts->push_back(remoteIP); } } diff --git a/pdns/syncres.hh b/pdns/syncres.hh index 3fc515d79c..51f3bede6a 100644 --- a/pdns/syncres.hh +++ b/pdns/syncres.hh @@ -963,7 +963,7 @@ public: }; typedef boost::circular_buffer addrringbuf_t; -extern thread_local std::unique_ptr t_servfailremotes, t_largeanswerremotes, t_remotes, t_bogusremotes; +extern thread_local std::unique_ptr t_servfailremotes, t_largeanswerremotes, t_remotes, t_bogusremotes, t_timeouts; extern thread_local std::unique_ptr > > t_queryring, t_servfailqueryring, t_bogusqueryring; extern thread_local std::shared_ptr t_allowFrom; diff --git a/pdns/ws-recursor.cc b/pdns/ws-recursor.cc index e3a5e80243..1b3a8c9908 100644 --- a/pdns/ws-recursor.cc +++ b/pdns/ws-recursor.cc @@ -540,6 +540,8 @@ void RecursorWebServer::jsonstat(HttpRequest* req, HttpResponse *resp) queries=broadcastAccFunction >(pleaseGetBogusRemotes); else if(req->getvars["name"]=="large-answer-remotes") queries=broadcastAccFunction >(pleaseGetLargeAnswerRemotes); + else if(req->getvars["name"]=="timeouts") + queries=broadcastAccFunction >(pleaseGetTimeouts); typedef map counts_t; counts_t counts;