From 18e3e086d0fb70224ed07bba8d7ad9fa5b437019 Mon Sep 17 00:00:00 2001 From: Pieter Lexis Date: Thu, 30 Oct 2025 11:23:13 +0100 Subject: [PATCH] feat(dnsdist): Add ability to use incoming TraceID --- .../dnsdist-actions-definitions.yml | 8 ++ pdns/dnsdistdist/dnsdist-actions-factory.cc | 27 ++++- pdns/dnsdistdist/dnsdist-lua-actions.cc | 6 + pdns/dnsdistdist/docs/reference/actions.rst | 4 +- pdns/dnsdistdist/docs/reference/ottrace.rst | 20 ++++ .../test_OpenTelemetryTracing.py | 110 +++++++++++++++++- 6 files changed, 167 insertions(+), 8 deletions(-) diff --git a/pdns/dnsdistdist/dnsdist-actions-definitions.yml b/pdns/dnsdistdist/dnsdist-actions-definitions.yml index d7ce17c567..055dac87ef 100644 --- a/pdns/dnsdistdist/dnsdist-actions-definitions.yml +++ b/pdns/dnsdistdist/dnsdist-actions-definitions.yml @@ -406,6 +406,14 @@ are processed after this action" - name: "value" type: "bool" description: "Whether or not to enable tracing" + - name: "use_incoming_traceid" + type: "bool" + default: "false" + description: "When set in the EDNS section, use the query's Trace ID and Span ID" + - name: "trace_edns_option" + type: "u16" + default: 65500 + description: "The EDNS Option code to take the Trace ID from" - name: "SNMPTrap" description: "Send an SNMP trap, adding the message string as the query description. Subsequent rules are processed after this action" parameters: diff --git a/pdns/dnsdistdist/dnsdist-actions-factory.cc b/pdns/dnsdistdist/dnsdist-actions-factory.cc index 34cc53994d..7a3c3e15ef 100644 --- a/pdns/dnsdistdist/dnsdist-actions-factory.cc +++ b/pdns/dnsdistdist/dnsdist-actions-factory.cc @@ -1680,8 +1680,8 @@ private: class SetTraceAction : public DNSAction { public: - SetTraceAction(bool value) : - d_value{value} {}; + SetTraceAction(bool value, std::optional useIncomingTraceID, std::optional incomingTraceIDOptionCode) : + d_value{value}, d_useIncomingTraceID(useIncomingTraceID), d_incomingTraceIDOptionCode(incomingTraceIDOptionCode) {}; DNSAction::Action operator()([[maybe_unused]] DNSQuestion* dnsquestion, [[maybe_unused]] std::string* ruleresult) const override { @@ -1691,13 +1691,32 @@ public: vinfolog("SetTraceAction called, but OpenTelemetry tracing is globally disabled. Did you forget to call setOpenTelemetryTracing?"); return Action::None; } + dnsquestion->ids.tracingEnabled = d_value; if (d_value) { tracer->setRootSpanAttribute("query.qname", AnyValue{dnsquestion->ids.qname.toStringNoDot()}); tracer->setRootSpanAttribute("query.qtype", AnyValue{QType(dnsquestion->ids.qtype).toString()}); tracer->setRootSpanAttribute("query.remote.address", AnyValue{dnsquestion->ids.origRemote.toString()}); tracer->setRootSpanAttribute("query.remote.port", AnyValue{dnsquestion->ids.origRemote.getPort()}); + if (d_useIncomingTraceID.value_or(false)) { + if (dnsquestion->ednsOptions == nullptr && !parseEDNSOptions(*dnsquestion)) { + // Maybe parsed, but no EDNS found + return Action::None; + } + if (dnsquestion->ednsOptions == nullptr) { + // Parsing failed, log a warning and return + vinfolog("parsing EDNS options failed while looking for OpenTelemetry Trace ID"); + return Action::None; + } + pdns::trace::TraceID traceID; + pdns::trace::SpanID spanID; + if (pdns::trace::extractOTraceIDs(*(dnsquestion->ednsOptions), EDNSOptionCode::EDNSOptionCodeEnum(d_incomingTraceIDOptionCode.value_or(EDNSOptionCode::OTTRACEIDS)), traceID, spanID)) { + tracer->setTraceID(traceID); + if (spanID != pdns::trace::s_emptySpanID) { + tracer->setRootSpanID(spanID); + } + } + } } - dnsquestion->ids.tracingEnabled = d_value; #endif return Action::None; } @@ -1709,6 +1728,8 @@ public: private: bool d_value; + std::optional d_useIncomingTraceID; + std::optional d_incomingTraceIDOptionCode; }; class SNMPTrapAction : public DNSAction diff --git a/pdns/dnsdistdist/dnsdist-lua-actions.cc b/pdns/dnsdistdist/dnsdist-lua-actions.cc index 92a90b1729..506ad0bf9b 100644 --- a/pdns/dnsdistdist/dnsdist-lua-actions.cc +++ b/pdns/dnsdistdist/dnsdist-lua-actions.cc @@ -71,6 +71,12 @@ static std::vector convertLuaArrayToRegular(const LuaArray& luaArray) return out; } +template +std::optional boostToStandardOptional(const boost::optional& boostOpt) +{ + return boostOpt ? *boostOpt : std::optional(); +} + // NOLINTNEXTLINE(readability-function-cognitive-complexity): this function declares Lua bindings, even with a good refactoring it will likely blow up the threshold void setupLuaActions(LuaContext& luaCtx) { diff --git a/pdns/dnsdistdist/docs/reference/actions.rst b/pdns/dnsdistdist/docs/reference/actions.rst index 272ef80d24..b11121e6d3 100644 --- a/pdns/dnsdistdist/docs/reference/actions.rst +++ b/pdns/dnsdistdist/docs/reference/actions.rst @@ -780,7 +780,7 @@ The following actions exist. :param int ttl: Cache TTL for temporary failure replies -.. function:: SetTraceAction(value) +.. function:: SetTraceAction(value[, use_incoming_traceid[, trace_edns_option]]) .. versionadded:: 2.1.0 @@ -790,6 +790,8 @@ The following actions exist. Tracing has to be turned on globally as well using :func:`setOpenTelemetryTracing`. :param bool value: Whether to enable or disable query tracing. + :param bool use_incoming_traceid: If the incoming query has a TraceID in its EDNS options, use that instead of generating one, default false. + :param bool trace_edns_option: The EDNS option number that contains the TraceID, default 65500. .. function:: SkipCacheAction() diff --git a/pdns/dnsdistdist/docs/reference/ottrace.rst b/pdns/dnsdistdist/docs/reference/ottrace.rst index 0402d8da4b..61a00f0e56 100644 --- a/pdns/dnsdistdist/docs/reference/ottrace.rst +++ b/pdns/dnsdistdist/docs/reference/ottrace.rst @@ -99,3 +99,23 @@ This value is retrieved with the :func:`getSpanID ` funct end return DNSAction.None end + +Accepting Trace ID and Span ID from upstream servers +==================================================== + +:program:`dnsdist` can also use a Trace ID and optional Span ID from an incoming query. +It will not do this by default, but this can be configured with the ``use_incoming_traceid`` argument. +When set to ``true`` incoming Trace and Span IDs will be used. +Should there be no ID in the incoming query, a random ID will be generated. + +.. code-block:: yaml + + query_rules: + - name: Enable tracing + selector: + # Just as an example, in production don't trace all the queries + type: All + action: + type: SetTrace + value: true + use_incoming_traceid: true diff --git a/regression-tests.dnsdist/test_OpenTelemetryTracing.py b/regression-tests.dnsdist/test_OpenTelemetryTracing.py index fc7a34bfa2..49d991f6a0 100644 --- a/regression-tests.dnsdist/test_OpenTelemetryTracing.py +++ b/regression-tests.dnsdist/test_OpenTelemetryTracing.py @@ -1,7 +1,12 @@ #!/usr/bin/env python import base64 -import dns +import binascii +import dns.message +import dns.rrset +import dns.rdataclass +import dns.rdatatype +import dns.edns import time import opentelemetry.proto.trace.v1.trace_pb2 @@ -15,11 +20,23 @@ class DNSDistOpenTelemetryProtobufTest(test_Protobuf.DNSDistProtobufTest): self.assertTrue(msg) self.assertTrue(msg.HasField("openTelemetry")) - def sendQueryAndGetProtobuf(self, useTCP=False): + def sendQueryAndGetProtobuf( + self, useTCP=False, traceID="", spanID="", ednsTraceIDOpt=65500 + ): name = "query.ot.tests.powerdns.com." target = "target.ot.tests.powerdns.com." query = dns.message.make_query(name, "A", "IN") + + if traceID != "": + ottrace = dns.edns.GenericOption(str(ednsTraceIDOpt), "\x00\x00") + ottrace.data += binascii.a2b_hex(traceID) + if spanID != "": + ottrace.data += binascii.a2b_hex(spanID) + query = dns.message.make_query( + name, "A", "IN", use_edns=True, options=[ottrace] + ) + response = dns.message.make_response(query) rrset = dns.rrset.from_text( @@ -52,12 +69,15 @@ class DNSDistOpenTelemetryProtobufTest(test_Protobuf.DNSDistProtobufTest): class DNSDistOpenTelemetryProtobufBaseTest(DNSDistOpenTelemetryProtobufTest): - def doTest(self, wasDelayed=False, useTCP=False): - msg = self.sendQueryAndGetProtobuf(useTCP) + def doTest(self, wasDelayed=False, useTCP=False, traceID="", spanID=""): + msg = self.sendQueryAndGetProtobuf(useTCP, traceID, spanID) self.assertTrue(msg.HasField("openTelemetryTraceID")) self.assertTrue(msg.openTelemetryTraceID != "") + if traceID != "": + self.assertEqual(msg.openTelemetryTraceID, binascii.a2b_hex(traceID)) + self.assertTrue(msg.HasField("openTelemetryData")) traces_data = opentelemetry.proto.trace.v1.trace_pb2.TracesData() traces_data.ParseFromString(msg.openTelemetryData) @@ -143,6 +163,12 @@ class DNSDistOpenTelemetryProtobufBaseTest(DNSDistOpenTelemetryProtobufTest): f"span {msg_span} does not have the trace id {traceId} of the protobuf message", ) + if spanID != "": + for span in ot_data["resource_spans"][0]["scope_spans"][0]["spans"]: + if span["parent_span_id"] == binascii.a2b_hex("0000000000000000"): + self.assertEqual(binascii.a2b_hex(spanID), span["span_id"]) + break + class TestOpenTelemetryTracingBaseYAML(DNSDistOpenTelemetryProtobufBaseTest): _yaml_config_params = [ @@ -273,6 +299,82 @@ addResponseAction(AllRule(), RemoteLogResponseAction(rl, nil, false, {}, {}, tru self.doTest(wasDelayed=True, useTCP=True) +class TestOpenTelemetryTracingUseIncomingYAML(DNSDistOpenTelemetryProtobufBaseTest): + _yaml_config_params = [ + "_testServerPort", + "_protobufServerPort", + ] + _yaml_config_template = """--- +logging: + open_telemetry_tracing: true + +backends: + - address: 127.0.0.1:%d + protocol: Do53 + +remote_logging: + protobuf_loggers: + - name: pblog + address: 127.0.0.1:%d + +query_rules: + - name: Enable tracing + selector: + type: All + action: + type: SetTrace + value: true + use_incoming_traceid: true + +response_rules: + - name: Do PB logging + selector: + type: All + action: + type: RemoteLog + logger_name: pblog +""" + + def testNoTraceID(self): + self.doTest() + + def testOnlyTraceID(self): + self.doTest(traceID="0123456789ABCDEF0123456789ABCDEF") + + def testTraceIDAndSpanID(self): + self.doTest( + traceID="0123456789ABCDEF0123456789ABCDEF", + spanID="FEDCBA9876543210", + ) + + +class TestOpenTelemetryTracingUseIncomingLua(DNSDistOpenTelemetryProtobufBaseTest): + _config_params = [ + "_testServerPort", + "_protobufServerPort", + ] + _config_template = """ +newServer{address="127.0.0.1:%d"} +rl = newRemoteLogger('127.0.0.1:%d') +setOpenTelemetryTracing(true) + +addAction(AllRule(), SetTraceAction(true, true), {name="Enable tracing"}) +addResponseAction(AllRule(), RemoteLogResponseAction(rl, nil, false, {}, {}, false), {name="Do PB logging"}) +""" + + def testNoTraceID(self): + self.doTest() + + def testOnlyTraceID(self): + self.doTest(traceID="0123456789ABCDEF0123456789ABCDEF") + + def testTraceIDAndSpanID(self): + self.doTest( + traceID="0123456789ABCDEF0123456789ABCDEF", + spanID="FEDCBA9876543210", + ) + + class DNSDistOpenTelemetryProtobufNoOTDataTest(DNSDistOpenTelemetryProtobufTest): def doTest(self): msg = self.sendQueryAndGetProtobuf() -- 2.47.3