From: Remi Gacogne Date: Wed, 22 Jun 2022 16:26:34 +0000 (+0200) Subject: dnsdist: Add per-protocol latency averages X-Git-Tag: auth-4.8.0-alpha0~40^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=89b29a4d6303e27214583d51a958d2dc2ba8bcbb;p=thirdparty%2Fpdns.git dnsdist: Add per-protocol latency averages --- diff --git a/pdns/dnsdist-web.cc b/pdns/dnsdist-web.cc index 5779c267d2..f0b7631f97 100644 --- a/pdns/dnsdist-web.cc +++ b/pdns/dnsdist-web.cc @@ -161,6 +161,18 @@ std::map MetricDefinitionStorage::metrics{ { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")}, { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")}, { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")}, + { "latency-tcp-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 100 packets received over TCP")}, + { "latency-tcp-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 1000 packets received over TCP")}, + { "latency-tcp-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 10000 packets received over TCP")}, + { "latency-tcp-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 1000000 packets received over TCP")}, + { "latency-dot-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 100 packets received over DoT")}, + { "latency-dot-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 1000 packets received over DoT")}, + { "latency-dot-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 10000 packets received over DoT")}, + { "latency-dot-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 1000000 packets received over DoT")}, + { "latency-doh-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 100 packets received over DoH")}, + { "latency-doh-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 1000 packets received over DoH")}, + { "latency-doh-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 10000 packets received over DoH")}, + { "latency-doh-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency, in microseconds, of the last 1000000 packets received over DoH")}, { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")}, { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")}, { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")}, diff --git a/pdns/dnsdist.cc b/pdns/dnsdist.cc index e79f31fb02..f86586a6a0 100644 --- a/pdns/dnsdist.cc +++ b/pdns/dnsdist.cc @@ -246,25 +246,58 @@ bool DNSQuestion::setTrailingData(const std::string& tail) return true; } -void doLatencyStats(double udiff) +void doLatencyStats(dnsdist::Protocol protocol, double udiff) { - if(udiff < 1000) ++g_stats.latency0_1; - else if(udiff < 10000) ++g_stats.latency1_10; - else if(udiff < 50000) ++g_stats.latency10_50; - else if(udiff < 100000) ++g_stats.latency50_100; - else if(udiff < 1000000) ++g_stats.latency100_1000; - else ++g_stats.latencySlow; - g_stats.latencySum += udiff / 1000; - ++g_stats.latencyCount; - - auto doAvg = [](double& var, double n, double weight) { + constexpr auto doAvg = [](double& var, double n, double weight) { var = (weight -1) * var/weight + n/weight; }; - doAvg(g_stats.latencyAvg100, udiff, 100); - doAvg(g_stats.latencyAvg1000, udiff, 1000); - doAvg(g_stats.latencyAvg10000, udiff, 10000); - doAvg(g_stats.latencyAvg1000000, udiff, 1000000); + if (protocol == dnsdist::Protocol::DoUDP || protocol == dnsdist::Protocol::DNSCryptUDP) { + if (udiff < 1000) { + ++g_stats.latency0_1; + } + else if (udiff < 10000) { + ++g_stats.latency1_10; + } + else if (udiff < 50000) { + ++g_stats.latency10_50; + } + else if (udiff < 100000) { + ++g_stats.latency50_100; + } + else if (udiff < 1000000) { + ++g_stats.latency100_1000; + } + else { + ++g_stats.latencySlow; + } + + g_stats.latencySum += udiff / 1000; + ++g_stats.latencyCount; + + doAvg(g_stats.latencyAvg100, udiff, 100); + doAvg(g_stats.latencyAvg1000, udiff, 1000); + doAvg(g_stats.latencyAvg10000, udiff, 10000); + doAvg(g_stats.latencyAvg1000000, udiff, 1000000); + } + else if (protocol == dnsdist::Protocol::DoTCP || protocol == dnsdist::Protocol::DNSCryptTCP) { + doAvg(g_stats.latencyTCPAvg100, udiff, 100); + doAvg(g_stats.latencyTCPAvg1000, udiff, 1000); + doAvg(g_stats.latencyTCPAvg10000, udiff, 10000); + doAvg(g_stats.latencyTCPAvg1000000, udiff, 1000000); + } + else if (protocol == dnsdist::Protocol::DoT) { + doAvg(g_stats.latencyDoTAvg100, udiff, 100); + doAvg(g_stats.latencyDoTAvg1000, udiff, 1000); + doAvg(g_stats.latencyDoTAvg10000, udiff, 10000); + doAvg(g_stats.latencyDoTAvg1000000, udiff, 1000000); + } + else if (protocol == dnsdist::Protocol::DoH) { + doAvg(g_stats.latencyDoHAvg100, udiff, 100); + doAvg(g_stats.latencyDoHAvg1000, udiff, 1000); + doAvg(g_stats.latencyDoHAvg10000, udiff, 10000); + doAvg(g_stats.latencyDoHAvg1000000, udiff, 1000000); + } } bool responseContentMatches(const PacketBuffer& response, const DNSName& qname, const uint16_t qtype, const uint16_t qclass, const std::shared_ptr& remote, unsigned int& qnameWireLength) @@ -566,16 +599,16 @@ static bool sendUDPResponse(int origFD, const PacketBuffer& response, const int return true; } -void handleResponseSent(const IDState& ids, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol protocol) +void handleResponseSent(const IDState& ids, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol outgoingProtocol) { - handleResponseSent(ids.qname, ids.qtype, udiff, client, backend, size, cleartextDH, protocol); + handleResponseSent(ids.qname, ids.qtype, udiff, client, backend, size, cleartextDH, outgoingProtocol, ids.protocol); } -void handleResponseSent(const DNSName& qname, const QType& qtype, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol protocol) +void handleResponseSent(const DNSName& qname, const QType& qtype, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol outgoingProtocol, dnsdist::Protocol incomingProtocol) { struct timespec ts; gettime(&ts); - g_rings.insertResponse(ts, client, qname, qtype, static_cast(udiff), size, cleartextDH, backend, protocol); + g_rings.insertResponse(ts, client, qname, qtype, static_cast(udiff), size, cleartextDH, backend, outgoingProtocol); switch (cleartextDH.rcode) { case RCode::NXDomain: @@ -589,6 +622,8 @@ void handleResponseSent(const DNSName& qname, const QType& qtype, double udiff, ++g_stats.frontendNoError; break; } + + doLatencyStats(incomingProtocol, udiff); } // listens on a dedicated socket, lobs answers from downstream servers to original requestors @@ -724,8 +759,6 @@ void responderThread(std::shared_ptr dss) handleResponseSent(*ids, udiff, *dr.remote, dss->d_config.remote, static_cast(got), cleartextDH, dss->getProtocol()); dss->releaseState(queryId); - - doLatencyStats(udiff); } } catch (const std::exception& e){ @@ -1221,7 +1254,7 @@ static bool prepareOutgoingResponse(LocalHolders& holders, ClientState& cs, DNSQ break; } - doLatencyStats(0); // we're not going to measure this + doLatencyStats(cs.getProtocol(), 0); // we're not going to measure this return true; } @@ -1416,8 +1449,6 @@ public: vinfolog("Got answer from %s, relayed to %s (UDP), took %f usec", d_ds->d_config.remote.toStringWithPort(), ids.origRemote.toStringWithPort(), udiff); handleResponseSent(ids, udiff, *dr.remote, d_ds->d_config.remote, response.d_buffer.size(), cleartextDH, d_ds->getProtocol()); - - doLatencyStats(udiff); } void handleXFRResponse(const struct timeval& now, TCPResponse&& response) override @@ -1547,7 +1578,7 @@ static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct /* we use dest, always, because we don't want to use the listening address to send a response since it could be 0.0.0.0 */ sendUDPResponse(cs.udpFD, query, dq.delayMsec, dest, remote); - handleResponseSent(qname, qtype, 0., remote, ComboAddress(), query.size(), *dh, dnsdist::Protocol::DoUDP); + handleResponseSent(qname, qtype, 0., remote, ComboAddress(), query.size(), *dh, dnsdist::Protocol::DoUDP, dnsdist::Protocol::DoUDP); return; } diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index f671ed49b3..9b3397f9d6 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -366,6 +366,9 @@ struct DNSDistStats stat_t tcpCrossProtocolQueryPipeFull{0}; stat_t tcpCrossProtocolResponsePipeFull{0}; double latencyAvg100{0}, latencyAvg1000{0}, latencyAvg10000{0}, latencyAvg1000000{0}; + double latencyTCPAvg100{0}, latencyTCPAvg1000{0}, latencyTCPAvg10000{0}, latencyTCPAvg1000000{0}; + double latencyDoTAvg100{0}, latencyDoTAvg1000{0}, latencyDoTAvg10000{0}, latencyDoTAvg1000000{0}; + double latencyDoHAvg100{0}, latencyDoHAvg1000{0}, latencyDoHAvg10000{0}, latencyDoHAvg1000000{0}; typedef std::function statfunction_t; typedef boost::variant*, double*, statfunction_t> entry_t; @@ -397,6 +400,18 @@ struct DNSDistStats {"latency-avg1000", &latencyAvg1000}, {"latency-avg10000", &latencyAvg10000}, {"latency-avg1000000", &latencyAvg1000000}, + {"latency-tcp-avg100", &latencyTCPAvg100}, + {"latency-tcp-avg1000", &latencyTCPAvg1000}, + {"latency-tcp-avg10000", &latencyTCPAvg10000}, + {"latency-tcp-avg1000000", &latencyTCPAvg1000000}, + {"latency-dot-avg100", &latencyDoTAvg100}, + {"latency-dot-avg1000", &latencyDoTAvg1000}, + {"latency-dot-avg10000", &latencyDoTAvg10000}, + {"latency-dot-avg1000000", &latencyDoTAvg1000000}, + {"latency-doh-avg100", &latencyDoHAvg100}, + {"latency-doh-avg1000", &latencyDoHAvg1000}, + {"latency-doh-avg10000", &latencyDoHAvg10000}, + {"latency-doh-avg1000000", &latencyDoHAvg1000000}, {"uptime", uptimeOfProcess}, {"real-memory-usage", getRealMemoryUsage}, {"special-memory-usage", getSpecialMemoryUsage}, @@ -441,7 +456,7 @@ struct DNSDistStats }; extern struct DNSDistStats g_stats; -void doLatencyStats(double udiff); +void doLatencyStats(dnsdist::Protocol protocol, double udiff); #include "dnsdist-idstate.hh" @@ -643,6 +658,28 @@ struct ClientState return tlsFrontend != nullptr || (dohFrontend != nullptr && dohFrontend->isHTTPS()); } + dnsdist::Protocol getProtocol() const + { + if (dnscryptCtx) { + if (udpFD != -1) { + return dnsdist::Protocol::DNSCryptUDP; + } + return dnsdist::Protocol::DNSCryptTCP; + } + if (isDoH()) { + return dnsdist::Protocol::DoH; + } + else if (hasTLS()) { + return dnsdist::Protocol::DoT; + } + else if (udpFD != -1) { + return dnsdist::Protocol::DoUDP; + } + else { + return dnsdist::Protocol::DoTCP; + } + } + std::string getType() const { std::string result = udpFD != -1 ? "UDP" : "TCP"; @@ -1130,6 +1167,5 @@ DNSResponse makeDNSResponseFromIDState(IDState& ids, PacketBuffer& data); void setIDStateFromDNSQuestion(IDState& ids, DNSQuestion& dq, DNSName&& qname); ssize_t udpClientSendRequestToBackend(const std::shared_ptr& ss, const int sd, const PacketBuffer& request, bool healthCheck = false); -void handleResponseSent(const DNSName& qname, const QType& qtype, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol protocol); -void handleResponseSent(const IDState& ids, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol protocol); - +void handleResponseSent(const DNSName& qname, const QType& qtype, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol outgoingProtocol, dnsdist::Protocol incomingProtocol); +void handleResponseSent(const IDState& ids, double udiff, const ComboAddress& client, const ComboAddress& backend, unsigned int size, const dnsheader& cleartextDH, dnsdist::Protocol outgoingProtocol); diff --git a/pdns/dnsdistdist/doh.cc b/pdns/dnsdistdist/doh.cc index daa8efaa6b..ec0e068f91 100644 --- a/pdns/dnsdistdist/doh.cc +++ b/pdns/dnsdistdist/doh.cc @@ -624,7 +624,7 @@ static void processDOHQuery(DOHUnitUniquePtr&& du) if (du->response.size() >= sizeof(dnsheader) && du->contentType.empty()) { auto dh = reinterpret_cast(du->response.data()); - handleResponseSent(qname, QType(qtype), 0., du->ids.origDest, ComboAddress(), du->response.size(), *dh, dnsdist::Protocol::DoH); + handleResponseSent(qname, QType(qtype), 0., du->ids.origDest, ComboAddress(), du->response.size(), *dh, dnsdist::Protocol::DoH, dnsdist::Protocol::DoH); } sendDoHUnitToTheMainThread(std::move(du), "DoH self-answered response"); return; diff --git a/regression-tests.dnsdist/test_API.py b/regression-tests.dnsdist/test_API.py index 784cdeaa14..68526a7cb1 100644 --- a/regression-tests.dnsdist/test_API.py +++ b/regression-tests.dnsdist/test_API.py @@ -29,7 +29,10 @@ class APITestsBase(DNSDistTest): 'downstream-send-errors', 'trunc-failures', 'no-policy', 'latency0-1', 'latency1-10', 'latency10-50', 'latency50-100', 'latency100-1000', 'latency-slow', 'latency-sum', 'latency-count', 'latency-avg100', 'latency-avg1000', - 'latency-avg10000', 'latency-avg1000000', 'uptime', 'real-memory-usage', 'noncompliant-queries', + 'latency-avg10000', 'latency-avg1000000', 'latency-tcp-avg100', 'latency-tcp-avg1000', + 'latency-tcp-avg10000', 'latency-tcp-avg1000000', 'latency-dot-avg100', 'latency-dot-avg1000', + 'latency-dot-avg10000', 'latency-dot-avg1000000', 'latency-doh-avg100', 'latency-doh-avg1000', + 'latency-doh-avg10000', 'latency-doh-avg1000000', 'uptime', 'real-memory-usage', 'noncompliant-queries', 'noncompliant-responses', 'rdqueries', 'empty-queries', 'cache-hits', 'cache-misses', 'cpu-iowait', 'cpu-steal', 'cpu-sys-msec', 'cpu-user-msec', 'fd-usage', 'dyn-blocked', 'dyn-block-nmg-size', 'rule-servfail', 'rule-truncated', 'security-status',