]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
feat(dnsdist): Add more TCP related traces
authorPieter Lexis <pieter.lexis@powerdns.com>
Fri, 28 Nov 2025 14:03:32 +0000 (15:03 +0100)
committerPieter Lexis <pieter.lexis@powerdns.com>
Fri, 12 Dec 2025 15:30:51 +0000 (16:30 +0100)
pdns/dnsdistdist/dnsdist-backend.cc
pdns/dnsdistdist/dnsdist-tcp-downstream.cc
pdns/dnsdistdist/dnsdist-tcp.cc
regression-tests.dnsdist/test_OpenTelemetryTracing.py

index 6b2397fd3c195da89090a1dd03b01b2825bc0c34..112655401c6f3ce6648c2ffbb4602397e78118e7 100644 (file)
@@ -42,6 +42,7 @@
 
 bool DownstreamState::passCrossProtocolQuery(std::unique_ptr<CrossProtocolQuery>&& cpq)
 {
+  auto closer = cpq->query.d_idstate.getCloser(__func__); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
 #if defined(HAVE_DNS_OVER_HTTPS) && defined(HAVE_NGHTTP2)
   if (!d_config.d_dohPath.empty()) {
     return g_dohClientThreads && g_dohClientThreads->passCrossProtocolQueryToThread(std::move(cpq));
index f64d8573cd0d442dee24aa1bebb914d289f26ea0..8d83181e9fb7118bb65be5b1c62cf4a310868f31 100644 (file)
@@ -232,6 +232,7 @@ enum class ConnectionState : uint8_t {
 
 static void prepareQueryForSending(TCPQuery& query, uint16_t queryID, ConnectionState connectionState)
 {
+  auto closer = query.d_idstate.getCloser(__func__); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
   if (connectionState == ConnectionState::needProxy) {
     if (query.d_proxyProtocolPayload.size() > 0 && !query.d_proxyProtocolPayloadAdded) {
       query.d_buffer.insert(query.d_buffer.begin(), query.d_proxyProtocolPayload.begin(), query.d_proxyProtocolPayload.end());
@@ -257,9 +258,11 @@ static void prepareQueryForSending(TCPQuery& query, uint16_t queryID, Connection
   editPayloadID(query.d_buffer, queryID, query.d_proxyProtocolPayloadAdded ? query.d_idstate.d_proxyProtocolPayloadSize : 0, true);
 }
 
+static const string classnamePrefix = "TCPConnectionToBackend::";
 IOState TCPConnectionToBackend::queueNextQuery(std::shared_ptr<TCPConnectionToBackend>& conn)
 {
   conn->d_currentQuery = std::move(conn->d_pendingQueries.front());
+  auto closer = conn->d_currentQuery.d_query.d_idstate.getCloser(classnamePrefix + __func__);
 
   uint16_t id = conn->d_highestStreamID;
   prepareQueryForSending(conn->d_currentQuery.d_query, id, conn->needProxyProtocolPayload() ? ConnectionState::needProxy : ConnectionState::proxySent);
@@ -273,6 +276,7 @@ IOState TCPConnectionToBackend::queueNextQuery(std::shared_ptr<TCPConnectionToBa
 
 IOState TCPConnectionToBackend::sendQuery(std::shared_ptr<TCPConnectionToBackend>& conn, const struct timeval& now)
 {
+  auto closer = conn->d_currentQuery.d_query.d_idstate.getCloser(classnamePrefix + __func__);
   (void)now;
   DEBUGLOG("sending query to backend "<<conn->getDS()->getNameWithAddr()<<" over FD "<<conn->d_handler->getDescriptor());
 
@@ -543,6 +547,7 @@ void TCPConnectionToBackend::handleIOCallback(int fd, FDMultiplexer::funcparam_t
 
 void TCPConnectionToBackend::queueQuery(std::shared_ptr<TCPQuerySender>& sender, TCPQuery&& query)
 {
+  auto closer = query.d_idstate.getCloser(classnamePrefix + __func__);
   if (!d_ioState) {
     d_ioState = make_unique<IOStateHandler>(*d_mplexer, d_handler->getDescriptor());
   }
@@ -695,6 +700,7 @@ IOState TCPConnectionToBackend::handleResponse(std::shared_ptr<TCPConnectionToBa
     notifyAllQueriesFailed(now, FailureReason::unexpectedQueryID);
     return IOState::Done;
   }
+  auto closer = it->second.d_query.d_idstate.getCloser(classnamePrefix + __func__);
 
   editPayloadID(d_responseBuffer, ntohs(it->second.d_query.d_idstate.origID), 0, false);
 
index 689e5f7f6094889cd66a8e5ac6b5cca290aee223..a6e5cd6b7754186cbbb4bba9fc9514fc5cd58bc2 100644 (file)
@@ -507,6 +507,7 @@ void IncomingTCPConnectionState::terminateClientConnection()
 
 void IncomingTCPConnectionState::queueResponse(std::shared_ptr<IncomingTCPConnectionState>& state, const struct timeval& now, TCPResponse&& response, bool fromBackend)
 {
+  auto closer = response.d_idstate.getCloser(__func__); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
   // queue response
   state->d_queuedResponses.emplace_back(std::move(response));
   DEBUGLOG("queueing response, state is " << (int)state->d_state << ", queue size is now " << state->d_queuedResponses.size());
@@ -596,6 +597,7 @@ void IncomingTCPConnectionState::updateIO(IOState newState, const struct timeval
 /* called from the backend code when a new response has been received */
 void IncomingTCPConnectionState::handleResponse(const struct timeval& now, TCPResponse&& response)
 {
+  auto closer = response.d_idstate.getCloser(__func__); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
   if (std::this_thread::get_id() != d_creatorThreadID) {
     handleCrossProtocolResponse(now, std::move(response));
     return;
@@ -746,6 +748,7 @@ std::unique_ptr<CrossProtocolQuery> getTCPCrossProtocolQueryFromDQ(DNSQuestion&
 
 void IncomingTCPConnectionState::handleCrossProtocolResponse(const struct timeval& now, TCPResponse&& response)
 {
+  auto closer = response.d_idstate.getCloser(__func__); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
   std::shared_ptr<IncomingTCPConnectionState> state = shared_from_this();
   try {
     auto ptr = std::make_unique<TCPCrossProtocolResponse>(std::move(response), state, now);
@@ -837,11 +840,8 @@ IncomingTCPConnectionState::QueryProcessingResult IncomingTCPConnectionState::ha
     }
   }
 
-  pdns::trace::dnsdist::Tracer::Closer closer;
-  if (auto tracer = ids.getTracer(); tracer != nullptr) {
-    // TODO: figure out if this is a root span
-    closer = tracer->openSpan("IncomingTCPConnectionState::handleQuery", tracer->getLastSpanID());
-  }
+  static const std::string classnamePrefix = "IncomingTCPConnectionState::";
+  auto closer = ids.getCloser(classnamePrefix + __func__); // NOLINT(cppcoreguidelines-pro-bounds-array-to-pointer-decay)
 
   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast
   ids.qname = DNSName(reinterpret_cast<const char*>(query.data()), static_cast<int>(query.size()), sizeof(dnsheader), false, &ids.qtype, &ids.qclass);
@@ -969,7 +969,11 @@ IncomingTCPConnectionState::QueryProcessingResult IncomingTCPConnectionState::ha
 
   prependSizeToTCPQuery(query, 0);
 
-  auto downstreamConnection = getDownstreamConnection(backend, dnsQuestion.proxyProtocolValues, now);
+  std::shared_ptr<TCPConnectionToBackend> downstreamConnection;
+  {
+    auto dscCloser = dnsQuestion.ids.getCloser("getDownstreamConnection");
+    downstreamConnection = getDownstreamConnection(backend, dnsQuestion.proxyProtocolValues, now);
+  }
 
   if (backend->d_config.useProxyProtocol) {
     /* if we ever sent a TLV over a connection, we can never go back */
@@ -984,8 +988,12 @@ IncomingTCPConnectionState::QueryProcessingResult IncomingTCPConnectionState::ha
     downstreamConnection->setProxyProtocolValuesSent(std::move(dnsQuestion.proxyProtocolValues));
   }
 
-  TCPQuery tcpquery(std::move(query), std::move(ids));
-  tcpquery.d_proxyProtocolPayload = std::move(proxyProtocolPayload);
+  TCPQuery tcpquery;
+  {
+    auto tcpqueryCloser = dnsQuestion.ids.getCloser("createTCPQuery");
+    tcpquery = {std::move(query), std::move(ids)};
+    tcpquery.d_proxyProtocolPayload = std::move(proxyProtocolPayload);
+  }
 
   vinfolog("Got query for %s|%s from %s (%s, %d bytes), relayed to %s", tcpquery.d_idstate.qname.toLogString(), QType(tcpquery.d_idstate.qtype).toString(), d_proxiedRemote.toStringWithPort(), getProtocol().toString(), tcpquery.d_buffer.size(), backend->getNameWithAddr());
   std::shared_ptr<TCPQuerySender> incoming = state;
index 3f90fa73294b005800f032a6ecb7e8005fb9bfbe..6a5964fa196574856522d1b480fa0a35ff8a5754 100644 (file)
@@ -169,7 +169,12 @@ class DNSDistOpenTelemetryProtobufTest(test_Protobuf.DNSDistProtobufTest):
 
 class DNSDistOpenTelemetryProtobufBaseTest(DNSDistOpenTelemetryProtobufTest):
     def doTest(
-        self, hasProcessResponseAfterRules=False, useTCP=False, traceID="", spanID=""
+        self,
+        hasProcessResponseAfterRules=False,
+        useTCP=False,
+        traceID="",
+        spanID="",
+        extraFunctions=set(),
     ):
         msg = self.sendQueryAndGetProtobuf(useTCP, traceID, spanID)
 
@@ -186,7 +191,9 @@ class DNSDistOpenTelemetryProtobufBaseTest(DNSDistOpenTelemetryProtobufTest):
             traces_data, preserving_proto_field_name=True
         )
 
-        self.checkOTData(ot_data, hasProcessResponseAfterRules, useTCP)
+        self.checkOTData(
+            ot_data, hasProcessResponseAfterRules, useTCP, extraFunctions=extraFunctions
+        )
 
         traceId = base64.b64encode(msg.openTelemetryTraceID).decode()
         for msg_span in ot_data["resource_spans"][0]["scope_spans"][0]["spans"]:
@@ -242,7 +249,18 @@ response_rules:
         self.doTest()
 
     def testTCP(self):
-        self.doTest(useTCP=True)
+        self.doTest(
+            useTCP=True,
+            extraFunctions={
+                "createTCPQuery",
+                "TCPConnectionToBackend::handleResponse",
+                "getDownstreamConnection",
+                "TCPConnectionToBackend::sendQuery",
+                "handleResponse",
+                "prepareQueryForSending",
+                "TCPConnectionToBackend::queueQuery",
+            },
+        )
 
 
 class TestOpenTelemetryTracingBaseLua(DNSDistOpenTelemetryProtobufBaseTest):
@@ -264,7 +282,18 @@ addResponseAction(AllRule(), RemoteLogResponseAction(rl), {name="Do PB logging"}
         self.doTest()
 
     def testTCP(self):
-        self.doTest(useTCP=True)
+        self.doTest(
+            useTCP=True,
+            extraFunctions={
+                "createTCPQuery",
+                "TCPConnectionToBackend::handleResponse",
+                "getDownstreamConnection",
+                "TCPConnectionToBackend::sendQuery",
+                "handleResponse",
+                "prepareQueryForSending",
+                "TCPConnectionToBackend::queueQuery",
+            },
+        )
 
 
 class TestOpenTelemetryTracingBaseDelayYAML(DNSDistOpenTelemetryProtobufBaseTest):
@@ -307,7 +336,20 @@ response_rules:
         self.doTest(True)
 
     def testTCP(self):
-        self.doTest(hasProcessResponseAfterRules=True, useTCP=True)
+        self.doTest(
+            hasProcessResponseAfterRules=True,
+            useTCP=True,
+            extraFunctions={
+                "queueResponse",
+                "handleResponse",
+                "TCPConnectionToBackend::queueQuery",
+                "createTCPQuery",
+                "prepareQueryForSending",
+                "getDownstreamConnection",
+                "TCPConnectionToBackend::handleResponse",
+                "TCPConnectionToBackend::sendQuery",
+            },
+        )
 
 
 class TestOpenTelemetryTracingBaseDelayLua(DNSDistOpenTelemetryProtobufBaseTest):
@@ -329,7 +371,20 @@ addResponseAction(AllRule(), RemoteLogResponseAction(rl, nil, false, {}, {}, tru
         self.doTest(True)
 
     def testTCP(self):
-        self.doTest(hasProcessResponseAfterRules=True, useTCP=True)
+        self.doTest(
+            hasProcessResponseAfterRules=True,
+            useTCP=True,
+            extraFunctions={
+                "queueResponse",
+                "handleResponse",
+                "TCPConnectionToBackend::queueQuery",
+                "createTCPQuery",
+                "prepareQueryForSending",
+                "getDownstreamConnection",
+                "TCPConnectionToBackend::handleResponse",
+                "TCPConnectionToBackend::sendQuery",
+            },
+        )
 
 
 class TestOpenTelemetryTracingUseIncomingYAML(DNSDistOpenTelemetryProtobufBaseTest):
@@ -566,7 +621,7 @@ response_rules:
      type: Drop
 """
 
-    def doTest(self, useTCP=False):
+    def doTest(self, useTCP=False, extraFunctions=set()):
         msg = self.sendQueryAndGetProtobuf(useTCP=useTCP, dropped=True)
         traces_data = opentelemetry.proto.trace.v1.trace_pb2.TracesData()
         traces_data.ParseFromString(msg.openTelemetryData)
@@ -574,21 +629,36 @@ response_rules:
             traces_data, preserving_proto_field_name=True
         )
 
+        funcs = extraFunctions.union(
+            {
+                "ResponseRule: Drop",
+            }
+        )
+
         self.checkOTData(
             ot_data,
             hasProcessResponseAfterRules=False,
             hasRemoteLogResponseAction=False,
             useTCP=useTCP,
-            extraFunctions={
-                "ResponseRule: Drop",
-            },
+            extraFunctions=funcs,
         )
 
     def testBasic(self):
         self.doTest(False)
 
     def testTCP(self):
-        self.doTest(True)
+        self.doTest(
+            True,
+            extraFunctions={
+                "handleResponse",
+                "TCPConnectionToBackend::queueQuery",
+                "getDownstreamConnection",
+                "createTCPQuery",
+                "TCPConnectionToBackend::handleResponse",
+                "TCPConnectionToBackend::sendQuery",
+                "prepareQueryForSending",
+            },
+        )
 
 
 class TestOpenTelemetryTracingBaseLuaIncludedRemoteLoggerDropped(
@@ -649,7 +719,7 @@ query_rules:
        - 192.0.2.1
 """
 
-    def doTest(self, useTCP=False):
+    def doTest(self, useTCP=False, extraFunctions=set()):
         msg = self.sendQueryAndGetProtobuf(
             useTCP=useTCP, querySentByDNSDist=False, dropped=True
         )
@@ -659,6 +729,7 @@ query_rules:
             traces_data, preserving_proto_field_name=True
         )
 
+        funcs = extraFunctions.union({"Rule: Spoof A record"})
         self.checkOTData(
             ot_data,
             hasProcessResponseAfterRules=False,
@@ -666,13 +737,11 @@ query_rules:
             useTCP=useTCP,
             hasSelectBackendForOutgoingQuery=False,
             hasResponse=False,
-            extraFunctions={
-                "Rule: Spoof A record",
-            },
+            extraFunctions=funcs,
         )
 
     def testBasic(self):
         self.doTest()
 
     def testTCP(self):
-        self.doTest(useTCP=True)
+        self.doTest(useTCP=True, extraFunctions={"queueResponse"})