]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
feat(dnsdist): Add ability to use incoming TraceID 16395/head
authorPieter Lexis <pieter.lexis@powerdns.com>
Thu, 30 Oct 2025 10:23:13 +0000 (11:23 +0100)
committerPieter Lexis <pieter.lexis@powerdns.com>
Tue, 4 Nov 2025 11:02:43 +0000 (12:02 +0100)
pdns/dnsdistdist/dnsdist-actions-definitions.yml
pdns/dnsdistdist/dnsdist-actions-factory.cc
pdns/dnsdistdist/dnsdist-lua-actions.cc
pdns/dnsdistdist/docs/reference/actions.rst
pdns/dnsdistdist/docs/reference/ottrace.rst
regression-tests.dnsdist/test_OpenTelemetryTracing.py

index d7ce17c567ab6a878950d00b5e62686295ed88e7..055dac87effac3fe528e8edfd24e5e2c01519894 100644 (file)
@@ -406,6 +406,14 @@ are processed after this action"
     - name: "value"
       type: "bool"
       description: "Whether or not to enable tracing"
+    - name: "use_incoming_traceid"
+      type: "bool"
+      default: "false"
+      description: "When set in the EDNS section, use the query's Trace ID and Span ID"
+    - name: "trace_edns_option"
+      type: "u16"
+      default: 65500
+      description: "The EDNS Option code to take the Trace ID from"
 - name: "SNMPTrap"
   description: "Send an SNMP trap, adding the message string as the query description. Subsequent rules are processed after this action"
   parameters:
index 34cc53994dc8ea83b34fbac94128795cef4dc2da..7a3c3e15efec861e21460fc8a032dd6a953e0c89 100644 (file)
@@ -1680,8 +1680,8 @@ private:
 class SetTraceAction : public DNSAction
 {
 public:
-  SetTraceAction(bool value) :
-    d_value{value} {};
+  SetTraceAction(bool value, std::optional<bool> useIncomingTraceID, std::optional<short unsigned int> incomingTraceIDOptionCode) :
+    d_value{value}, d_useIncomingTraceID(useIncomingTraceID), d_incomingTraceIDOptionCode(incomingTraceIDOptionCode) {};
 
   DNSAction::Action operator()([[maybe_unused]] DNSQuestion* dnsquestion, [[maybe_unused]] std::string* ruleresult) const override
   {
@@ -1691,13 +1691,32 @@ public:
       vinfolog("SetTraceAction called, but OpenTelemetry tracing is globally disabled. Did you forget to call setOpenTelemetryTracing?");
       return Action::None;
     }
+    dnsquestion->ids.tracingEnabled = d_value;
     if (d_value) {
       tracer->setRootSpanAttribute("query.qname", AnyValue{dnsquestion->ids.qname.toStringNoDot()});
       tracer->setRootSpanAttribute("query.qtype", AnyValue{QType(dnsquestion->ids.qtype).toString()});
       tracer->setRootSpanAttribute("query.remote.address", AnyValue{dnsquestion->ids.origRemote.toString()});
       tracer->setRootSpanAttribute("query.remote.port", AnyValue{dnsquestion->ids.origRemote.getPort()});
+      if (d_useIncomingTraceID.value_or(false)) {
+        if (dnsquestion->ednsOptions == nullptr && !parseEDNSOptions(*dnsquestion)) {
+          // Maybe parsed, but no EDNS found
+          return Action::None;
+        }
+        if (dnsquestion->ednsOptions == nullptr) {
+          // Parsing failed, log a warning and return
+          vinfolog("parsing EDNS options failed while looking for OpenTelemetry Trace ID");
+          return Action::None;
+        }
+        pdns::trace::TraceID traceID;
+        pdns::trace::SpanID spanID;
+        if (pdns::trace::extractOTraceIDs(*(dnsquestion->ednsOptions), EDNSOptionCode::EDNSOptionCodeEnum(d_incomingTraceIDOptionCode.value_or(EDNSOptionCode::OTTRACEIDS)), traceID, spanID)) {
+          tracer->setTraceID(traceID);
+          if (spanID != pdns::trace::s_emptySpanID) {
+            tracer->setRootSpanID(spanID);
+          }
+        }
+      }
     }
-    dnsquestion->ids.tracingEnabled = d_value;
 #endif
     return Action::None;
   }
@@ -1709,6 +1728,8 @@ public:
 
 private:
   bool d_value;
+  std::optional<bool> d_useIncomingTraceID;
+  std::optional<short unsigned int> d_incomingTraceIDOptionCode;
 };
 
 class SNMPTrapAction : public DNSAction
index 92a90b1729a8f1cad31b053e4103f35155c53045..506ad0bf9bbd7d87be213e13b3e0c286e9b0a290 100644 (file)
@@ -71,6 +71,12 @@ static std::vector<T> convertLuaArrayToRegular(const LuaArray<T>& luaArray)
   return out;
 }
 
+template <class T>
+std::optional<T> boostToStandardOptional(const boost::optional<T>& boostOpt)
+{
+  return boostOpt ? *boostOpt : std::optional<T>();
+}
+
 // NOLINTNEXTLINE(readability-function-cognitive-complexity): this function declares Lua bindings, even with a good refactoring it will likely blow up the threshold
 void setupLuaActions(LuaContext& luaCtx)
 {
index 272ef80d24ece9d2a2ebbb43e306198efbe452de..b11121e6d33bfd818cfb8c6e7693970c0ce9f40f 100644 (file)
@@ -780,7 +780,7 @@ The following actions exist.
 
   :param int ttl: Cache TTL for temporary failure replies
 
-.. function:: SetTraceAction(value)
+.. function:: SetTraceAction(value[, use_incoming_traceid[, trace_edns_option]])
 
   .. versionadded:: 2.1.0
 
@@ -790,6 +790,8 @@ The following actions exist.
   Tracing has to be turned on globally as well using :func:`setOpenTelemetryTracing`.
 
   :param bool value: Whether to enable or disable query tracing.
+  :param bool use_incoming_traceid: If the incoming query has a TraceID in its EDNS options, use that instead of generating one, default false.
+  :param bool trace_edns_option: The EDNS option number that contains the TraceID, default 65500.
 
 .. function:: SkipCacheAction()
 
index 0402d8da4b06e020a27c2cea971f5afa1ceb9d61..61a00f0e562747ac192e37038f6b01adf9c1446c 100644 (file)
@@ -99,3 +99,23 @@ This value is retrieved with the :func:`getSpanID <DNSQuestion:getSpanID>` funct
            end
            return DNSAction.None
          end
+
+Accepting Trace ID and Span ID from upstream servers
+====================================================
+
+:program:`dnsdist` can also use a Trace ID and optional Span ID from an incoming query.
+It will not do this by default, but this can be configured with the ``use_incoming_traceid`` argument.
+When set to ``true`` incoming Trace and Span IDs will be used.
+Should there be no ID in the incoming query, a random ID will be generated.
+
+.. code-block:: yaml
+
+   query_rules:
+     - name: Enable tracing
+       selector:
+         # Just as an example, in production don't trace all the queries
+         type: All
+       action:
+         type: SetTrace
+         value: true
+         use_incoming_traceid: true
index fc7a34bfa2a0e5658e676dce92a08dc186b0c0a5..49d991f6a07c5e574430cee8d31169681c312f85 100644 (file)
@@ -1,7 +1,12 @@
 #!/usr/bin/env python
 
 import base64
-import dns
+import binascii
+import dns.message
+import dns.rrset
+import dns.rdataclass
+import dns.rdatatype
+import dns.edns
 import time
 
 import opentelemetry.proto.trace.v1.trace_pb2
@@ -15,11 +20,23 @@ class DNSDistOpenTelemetryProtobufTest(test_Protobuf.DNSDistProtobufTest):
         self.assertTrue(msg)
         self.assertTrue(msg.HasField("openTelemetry"))
 
-    def sendQueryAndGetProtobuf(self, useTCP=False):
+    def sendQueryAndGetProtobuf(
+        self, useTCP=False, traceID="", spanID="", ednsTraceIDOpt=65500
+    ):
         name = "query.ot.tests.powerdns.com."
 
         target = "target.ot.tests.powerdns.com."
         query = dns.message.make_query(name, "A", "IN")
+
+        if traceID != "":
+            ottrace = dns.edns.GenericOption(str(ednsTraceIDOpt), "\x00\x00")
+            ottrace.data += binascii.a2b_hex(traceID)
+            if spanID != "":
+                ottrace.data += binascii.a2b_hex(spanID)
+            query = dns.message.make_query(
+                name, "A", "IN", use_edns=True, options=[ottrace]
+            )
+
         response = dns.message.make_response(query)
 
         rrset = dns.rrset.from_text(
@@ -52,12 +69,15 @@ class DNSDistOpenTelemetryProtobufTest(test_Protobuf.DNSDistProtobufTest):
 
 
 class DNSDistOpenTelemetryProtobufBaseTest(DNSDistOpenTelemetryProtobufTest):
-    def doTest(self, wasDelayed=False, useTCP=False):
-        msg = self.sendQueryAndGetProtobuf(useTCP)
+    def doTest(self, wasDelayed=False, useTCP=False, traceID="", spanID=""):
+        msg = self.sendQueryAndGetProtobuf(useTCP, traceID, spanID)
 
         self.assertTrue(msg.HasField("openTelemetryTraceID"))
         self.assertTrue(msg.openTelemetryTraceID != "")
 
+        if traceID != "":
+            self.assertEqual(msg.openTelemetryTraceID, binascii.a2b_hex(traceID))
+
         self.assertTrue(msg.HasField("openTelemetryData"))
         traces_data = opentelemetry.proto.trace.v1.trace_pb2.TracesData()
         traces_data.ParseFromString(msg.openTelemetryData)
@@ -143,6 +163,12 @@ class DNSDistOpenTelemetryProtobufBaseTest(DNSDistOpenTelemetryProtobufTest):
                 f"span {msg_span} does not have the trace id {traceId} of the protobuf message",
             )
 
+        if spanID != "":
+            for span in ot_data["resource_spans"][0]["scope_spans"][0]["spans"]:
+                if span["parent_span_id"] == binascii.a2b_hex("0000000000000000"):
+                    self.assertEqual(binascii.a2b_hex(spanID), span["span_id"])
+                    break
+
 
 class TestOpenTelemetryTracingBaseYAML(DNSDistOpenTelemetryProtobufBaseTest):
     _yaml_config_params = [
@@ -273,6 +299,82 @@ addResponseAction(AllRule(), RemoteLogResponseAction(rl, nil, false, {}, {}, tru
         self.doTest(wasDelayed=True, useTCP=True)
 
 
+class TestOpenTelemetryTracingUseIncomingYAML(DNSDistOpenTelemetryProtobufBaseTest):
+    _yaml_config_params = [
+        "_testServerPort",
+        "_protobufServerPort",
+    ]
+    _yaml_config_template = """---
+logging:
+  open_telemetry_tracing: true
+
+backends:
+  - address: 127.0.0.1:%d
+    protocol: Do53
+
+remote_logging:
+ protobuf_loggers:
+   - name: pblog
+     address: 127.0.0.1:%d
+
+query_rules:
+ - name: Enable tracing
+   selector:
+     type: All
+   action:
+     type: SetTrace
+     value: true
+     use_incoming_traceid: true
+
+response_rules:
+ - name: Do PB logging
+   selector:
+     type: All
+   action:
+     type: RemoteLog
+     logger_name: pblog
+"""
+
+    def testNoTraceID(self):
+        self.doTest()
+
+    def testOnlyTraceID(self):
+        self.doTest(traceID="0123456789ABCDEF0123456789ABCDEF")
+
+    def testTraceIDAndSpanID(self):
+        self.doTest(
+            traceID="0123456789ABCDEF0123456789ABCDEF",
+            spanID="FEDCBA9876543210",
+        )
+
+
+class TestOpenTelemetryTracingUseIncomingLua(DNSDistOpenTelemetryProtobufBaseTest):
+    _config_params = [
+        "_testServerPort",
+        "_protobufServerPort",
+    ]
+    _config_template = """
+newServer{address="127.0.0.1:%d"}
+rl = newRemoteLogger('127.0.0.1:%d')
+setOpenTelemetryTracing(true)
+
+addAction(AllRule(), SetTraceAction(true, true), {name="Enable tracing"})
+addResponseAction(AllRule(), RemoteLogResponseAction(rl, nil, false, {}, {}, false), {name="Do PB logging"})
+"""
+
+    def testNoTraceID(self):
+        self.doTest()
+
+    def testOnlyTraceID(self):
+        self.doTest(traceID="0123456789ABCDEF0123456789ABCDEF")
+
+    def testTraceIDAndSpanID(self):
+        self.doTest(
+            traceID="0123456789ABCDEF0123456789ABCDEF",
+            spanID="FEDCBA9876543210",
+        )
+
+
 class DNSDistOpenTelemetryProtobufNoOTDataTest(DNSDistOpenTelemetryProtobufTest):
     def doTest(self):
         msg = self.sendQueryAndGetProtobuf()