From a31dc423a02b39f1db74d279e44f757c5cac8e66 Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Tue, 20 Aug 2024 14:44:57 +0200 Subject: [PATCH] dnsdist: Stop reporting timeouts in `topSlow()`, add `topTimeouts()` Until this commit `topSlow()` returned queries that timed out, which is not very helpful. This was happening because timeouts are internally recorded with a very high response time. With this change, `topSlow()` now ignores queries that timed out, and a new command is added to look into these: `topTimeouts()`. (cherry picked from commit 49243aa47ffc4162a44d2badfdcbad3f6c5c8fd7) --- pdns/dnsdist-console.cc | 3 ++- pdns/dnsdist-lua-inspection.cc | 12 +++++++++--- pdns/dnsdistdist/docs/reference/config.rst | 13 +++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/pdns/dnsdist-console.cc b/pdns/dnsdist-console.cc index 3e92b566e9..cb53413f3c 100644 --- a/pdns/dnsdist-console.cc +++ b/pdns/dnsdist-console.cc @@ -831,7 +831,8 @@ const std::vector g_consoleKeywords{ { "topResponseRules", true, "[top][, vars]", "show `top` response rules" }, { "topRules", true, "[top][, vars]", "show `top` rules" }, { "topSelfAnsweredResponseRules", true, "[top][, vars]", "show `top` self-answered response rules" }, - { "topSlow", true, "[top][, limit][, labels]", "show `top` queries slower than `limit` milliseconds, grouped by last `labels` labels" }, + {"topSlow", true, "[top][, limit][, labels]", "show `top` queries slower than `limit` milliseconds (timeouts excepted), grouped by last `labels` labels"}, + {"topTimeouts", true, "[top][, labels]", "show `top` queries that timed out, grouped by last `labels` labels"}, { "TrailingDataRule", true, "", "Matches if the query has trailing data" }, { "truncateTC", true, "bool", "if set (defaults to no starting with dnsdist 1.2.0) truncate TC=1 answers so they are actually empty. Fixes an issue for PowerDNS Authoritative Server 2.9.22. Note: turning this on breaks compatibility with RFC 6891." }, { "unregisterDynBPFFilter", true, "DynBPFFilter", "unregister this dynamic BPF filter" }, diff --git a/pdns/dnsdist-lua-inspection.cc b/pdns/dnsdist-lua-inspection.cc index ba7dfdb92d..d347ae204b 100644 --- a/pdns/dnsdist-lua-inspection.cc +++ b/pdns/dnsdist-lua-inspection.cc @@ -378,12 +378,18 @@ void setupLuaInspection(LuaContext& luaCtx) luaCtx.executeCode(R"(function topResponses(top, kind, labels) top = top or 10; kind = kind or 0; for k,v in ipairs(getTopResponses(top, kind, labels)) do show(string.format("%4d %-40s %4d %4.1f%%",k,v[1],v[2],v[3])) end end)"); - luaCtx.writeFunction("getSlowResponses", [](uint64_t top, uint64_t msec, boost::optional labels) { - return getGenResponses(top, labels, [msec](const Rings::Response& r) { return r.usec > msec*1000; }); + luaCtx.writeFunction("getSlowResponses", [](uint64_t top, uint64_t msec, boost::optional labels, boost::optional timeouts) { + return getGenResponses(top, labels, [msec, timeouts](const Rings::Response& resp) { + if (timeouts && *timeouts) { + return resp.usec == std::numeric_limits::max(); + } + return resp.usec > msec * 1000 && resp.usec != std::numeric_limits::max(); }); + }); + luaCtx.executeCode(R"(function topSlow(top, msec, labels) top = top or 10; msec = msec or 500; for k,v in ipairs(getSlowResponses(top, msec, labels, false)) do show(string.format("%4d %-40s %4d %4.1f%%",k,v[1],v[2],v[3])) end end)"); - luaCtx.executeCode(R"(function topSlow(top, msec, labels) top = top or 10; msec = msec or 500; for k,v in ipairs(getSlowResponses(top, msec, labels)) do show(string.format("%4d %-40s %4d %4.1f%%",k,v[1],v[2],v[3])) end end)"); + luaCtx.executeCode(R"(function topTimeouts(top, labels) top = top or 10; for k,v in ipairs(getSlowResponses(top, 0, labels, true)) do show(string.format("%4d %-40s %4d %4.1f%%",k,v[1],v[2],v[3])) end end)"); luaCtx.writeFunction("getTopBandwidth", [](uint64_t top) { setLuaNoSideEffect(); diff --git a/pdns/dnsdistdist/docs/reference/config.rst b/pdns/dnsdistdist/docs/reference/config.rst index d62622b3ea..b6d9bcf86e 100644 --- a/pdns/dnsdistdist/docs/reference/config.rst +++ b/pdns/dnsdistdist/docs/reference/config.rst @@ -1498,6 +1498,9 @@ Status, Statistics and More .. function:: topSlow([num[, limit[, labels]]]) + .. versionchanged:: 1.9.7 + queries that timed out are no longer reported by ``topSlow``, see :func:`topTimeouts` instead + Print the ``num`` slowest queries that are slower than ``limit`` milliseconds. Optionally grouped by the rightmost ``labels`` DNS labels. @@ -1505,6 +1508,16 @@ Status, Statistics and More :param int limit: Show queries slower than this amount of milliseconds, defaults to 2000 :param int label: Number of labels to cut down to +.. function:: topTimeouts([num[, labels]]) + + .. versionadded:: 1.9.7 + + Print the ``num`` queries that timed out the most. + Optionally grouped by the rightmost ``labels`` DNS labels. + + :param int num: Number to show, defaults to 10 + :param int label: Number of labels to cut down to + .. _dynblocksref: Dynamic Blocks -- 2.47.2