From: Remi Gacogne Date: Fri, 17 Jun 2022 15:03:53 +0000 (+0200) Subject: dnsdist: Compute backend latency earlier, to avoid internal latency X-Git-Tag: auth-4.8.0-alpha0~46^2~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=cd663777d2624f05b31d57c1594d20b0994ba2a4;p=thirdparty%2Fpdns.git dnsdist: Compute backend latency earlier, to avoid internal latency Also properly report the latency as TCP when forwaring a query received over UDP to TCP-only, DoT and DoH backends. Exports the TCP latency in the prometheus and API metrics. --- diff --git a/pdns/dnsdist-tcp.cc b/pdns/dnsdist-tcp.cc index a5b02e19a6..11de6df453 100644 --- a/pdns/dnsdist-tcp.cc +++ b/pdns/dnsdist-tcp.cc @@ -238,11 +238,6 @@ static IOState sendQueuedResponses(std::shared_ptr& return IOState::Done; } -static void updateTCPLatency(const std::shared_ptr& ds, double udiff) -{ - ds->latencyUsecTCP = (127.0 * ds->latencyUsecTCP / 128.0) + udiff/128.0; -} - static void handleResponseSent(std::shared_ptr& state, const TCPResponse& currentResponse) { if (currentResponse.d_idstate.qtype == QType::AXFR || currentResponse.d_idstate.qtype == QType::IXFR) { @@ -262,8 +257,6 @@ static void handleResponseSent(std::shared_ptr& stat backendProtocol = dnsdist::Protocol::DoTCP; } ::handleResponseSent(ids, udiff, state->d_ci.remote, ds->d_config.remote, static_cast(currentResponse.d_buffer.size()), currentResponse.d_cleartextDH, backendProtocol); - - updateTCPLatency(ds, udiff); } } diff --git a/pdns/dnsdist-web.cc b/pdns/dnsdist-web.cc index c0f2ece9d8..4eb6492795 100644 --- a/pdns/dnsdist-web.cc +++ b/pdns/dnsdist-web.cc @@ -548,6 +548,8 @@ static void handlePrometheus(const YaHTTP::Request& req, YaHTTP::Response& resp) output << "# TYPE " << statesbase << "tcpavgconnduration " << "gauge" << "\n"; output << "# HELP " << statesbase << "tlsresumptions " << "The number of times a TLS session has been resumed" << "\n"; output << "# TYPE " << statesbase << "tlsersumptions " << "counter" << "\n"; + output << "# HELP " << statesbase << "tcplatency " << "Server's latency when answering TCP questions in milliseconds" << "\n"; + output << "# TYPE " << statesbase << "tcplatency " << "gauge" << "\n"; for (const auto& state : *states) { string serverName; @@ -568,8 +570,10 @@ static void handlePrometheus(const YaHTTP::Request& req, YaHTTP::Response& resp) output << statesbase << "queries" << label << " " << state->queries.load() << "\n"; output << statesbase << "responses" << label << " " << state->responses.load() << "\n"; output << statesbase << "drops" << label << " " << state->reuseds.load() << "\n"; - if (state->isUp()) - output << statesbase << "latency" << label << " " << state->latencyUsec/1000.0 << "\n"; + if (state->isUp()) { + output << statesbase << "latency" << label << " " << state->latencyUsec/1000.0 << "\n"; + output << statesbase << "tcplatency" << label << " " << state->latencyUsecTCP/1000.0 << "\n"; + } output << statesbase << "senderrors" << label << " " << state->sendErrors.load() << "\n"; output << statesbase << "outstanding" << label << " " << state->outstanding.load() << "\n"; output << statesbase << "order" << label << " " << state->d_config.order << "\n"; @@ -996,12 +1000,14 @@ static void addServerToJSON(Json::array& servers, int id, const std::shared_ptr< {"tcpAvgQueriesPerConnection", (double)a->tcpAvgQueriesPerConnection}, {"tcpAvgConnectionDuration", (double)a->tcpAvgConnectionDuration}, {"tlsResumptions", (double)a->tlsResumptions}, + {"tcpLatency", (double)(a->latencyUsecTCP/1000.0)}, {"dropRate", (double)a->dropRate} }; /* sending a latency for a DOWN server doesn't make sense */ if (a->d_config.availability == DownstreamState::Availability::Down) { server["latency"] = nullptr; + server["tcpLatency"] = nullptr; } servers.push_back(std::move(server)); diff --git a/pdns/dnsdist.cc b/pdns/dnsdist.cc index 6e11a4c8ce..e67d7a7a6b 100644 --- a/pdns/dnsdist.cc +++ b/pdns/dnsdist.cc @@ -654,6 +654,11 @@ void responderThread(std::shared_ptr dss) dh->id = ids->origID; ++dss->responses; + double udiff = ids->sentTime.udiff(); + // do that _before_ the processing, otherwise it's not fair to the backend + cerr<<"udiff is "<<(udiff/1000.0)<latencyUsec = (127.0 * dss->latencyUsec / 128.0) + udiff / 128.0; + /* don't call processResponse for DOH */ if (du) { #ifdef HAVE_DNS_OVER_HTTPS @@ -686,14 +691,12 @@ void responderThread(std::shared_ptr dss) sendUDPResponse(origFD, response, dr.delayMsec, ids->hopLocal, ids->hopRemote); } - double udiff = ids->sentTime.udiff(); + udiff = ids->sentTime.udiff(); vinfolog("Got answer from %s, relayed to %s, took %f usec", dss->d_config.remote.toStringWithPort(), ids->origRemote.toStringWithPort(), udiff); handleResponseSent(*ids, udiff, *dr.remote, dss->d_config.remote, static_cast(got), cleartextDH, dss->getProtocol()); dss->releaseState(queryId); - dss->latencyUsec = (127.0 * dss->latencyUsec / 128.0) + udiff/128.0; - doLatencyStats(udiff); } } @@ -1384,8 +1387,6 @@ public: handleResponseSent(ids, udiff, *dr.remote, d_ds->d_config.remote, response.d_buffer.size(), cleartextDH, d_ds->getProtocol()); - d_ds->latencyUsec = (127.0 * d_ds->latencyUsec / 128.0) + udiff/128.0; - doLatencyStats(udiff); } diff --git a/pdns/dnsdistdist/dnsdist-tcp-downstream.cc b/pdns/dnsdistdist/dnsdist-tcp-downstream.cc index 1dbf7cc96c..64f731eb23 100644 --- a/pdns/dnsdistdist/dnsdist-tcp-downstream.cc +++ b/pdns/dnsdistdist/dnsdist-tcp-downstream.cc @@ -659,6 +659,9 @@ IOState TCPConnectionToBackend::handleResponse(std::shared_ptrd_ds->outstanding; auto ids = std::move(it->second.d_query.d_idstate); + const double udiff = ids.sentTime.udiff(); + conn->d_ds->latencyUsecTCP = (127.0 * conn->d_ds->latencyUsecTCP / 128.0) + udiff / 128.0; + d_pendingResponses.erase(it); /* marking as idle for now, so we can accept new queries if our queues are empty */ if (d_pendingQueries.empty() && d_pendingResponses.empty()) {