]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
feat(dnsdist): Add global config option for OpenTelemetry tracing
authorPieter Lexis <pieter.lexis@powerdns.com>
Fri, 3 Oct 2025 14:51:29 +0000 (16:51 +0200)
committerPieter Lexis <pieter.lexis@powerdns.com>
Tue, 14 Oct 2025 18:34:58 +0000 (20:34 +0200)
pdns/dnsdistdist/dnsdist-actions-factory.cc
pdns/dnsdistdist/dnsdist-configuration.hh
pdns/dnsdistdist/dnsdist-idstate.hh
pdns/dnsdistdist/dnsdist-lua-bindings-dnsquestion.cc
pdns/dnsdistdist/dnsdist-lua-configuration-items.cc
pdns/dnsdistdist/dnsdist-protobuf.cc
pdns/dnsdistdist/dnsdist-settings-definitions.yml
pdns/dnsdistdist/dnsdist.cc
pdns/dnsdistdist/docs/reference/ottrace.rst

index d3407449205fe9beeb9e1c363112a21a4c8d3974..de4704a820790ef32fca6497839538640e9b8063 100644 (file)
@@ -42,6 +42,7 @@
 
 #include "dnstap.hh"
 #include "dnswriter.hh"
+#include "dolog.hh"
 #include "ednsoptions.hh"
 #include "fstrm_logger.hh"
 #include "ipcipher.hh"
@@ -1684,14 +1685,19 @@ public:
   {
     (void)ruleresult;
 #ifndef DISABLE_PROTOBUF
+    auto tracer = dnsquestion->ids.getTracer();
+    if (tracer == nullptr) {
+      vinfolog("SetTraceAction called, but OpenTelemetry tracing is globally disabled. Did you forget to call setOpenTelemetryTracing?");
+      return Action::None;
+    }
     if (d_value) {
-      dnsquestion->ids.d_OTTracer->activate();
-      dnsquestion->ids.d_OTTracer->setTraceAttribute("query.qname", AnyValue{dnsquestion->ids.qname.toStringNoDot()});
-      dnsquestion->ids.d_OTTracer->setTraceAttribute("query.qtype", AnyValue{QType(dnsquestion->ids.qtype).toString()});
-      dnsquestion->ids.d_OTTracer->setTraceAttribute("query.remote", AnyValue{dnsquestion->ids.origRemote.toLogString()});
+      tracer->activate();
+      tracer->setTraceAttribute("query.qname", AnyValue{dnsquestion->ids.qname.toStringNoDot()});
+      tracer->setTraceAttribute("query.qtype", AnyValue{QType(dnsquestion->ids.qtype).toString()});
+      tracer->setTraceAttribute("query.remote", AnyValue{dnsquestion->ids.origRemote.toLogString()});
     }
     else {
-      dnsquestion->ids.d_OTTracer->deactivate();
+      tracer->deactivate();
     }
     dnsquestion->ids.tracingEnabled = d_value;
 #endif
index 605d735278fb2b9315fad739a88683b99138285b..a08505258a3b14f12184a635b43bd5d3f143ab4e 100644 (file)
@@ -171,6 +171,7 @@ struct RuntimeConfiguration
   bool d_logConsoleConnections{true};
   bool d_addEDNSToSelfGeneratedResponses{true};
   bool d_applyACLToProxiedClients{false};
+  bool d_openTelemetryTracing{false}; // XXX: It would be nice to #ifndef DISABLE_PROTOBUF, but as this is defined in dnsdist-settings-definitions.yml, we can't
 };
 
 /* Be careful not to hold on this for too long, it can be invalidated
index a1e5cdd18d3301782fed35682ad5ed7e627bf158..0afb8a89bebd1c8ebe05d73f73240fa247c386a1 100644 (file)
@@ -23,6 +23,7 @@
 
 #include <cstdint>
 #include <ctime>
+#include <memory>
 #include <optional>
 #include <unordered_map>
 #include <utility>
@@ -30,6 +31,7 @@
 
 #include "config.h"
 #include "dnscrypt.hh"
+#include "dnsdist-configuration.hh"
 #include "dnsname.hh"
 #include "dnsdist-protocols.hh"
 #include "ednsextendederror.hh"
@@ -115,9 +117,27 @@ struct InternalQueryState
 
   // Whether or not Open Telemetry tracing is enabled for this query
   bool tracingEnabled = false;
-
-  // TODO: Do we want to keep some data *without* creating a tracer for each query?
-  std::shared_ptr<pdns::trace::dnsdist::Tracer> d_OTTracer{pdns::trace::dnsdist::Tracer::getTracer()};
+  /**
+   * @brief Returns the Tracer, but only if OpenTelemetry tracing is globally enabled
+   *
+   * @return
+   */
+  std::shared_ptr<pdns::trace::dnsdist::Tracer> getTracer()
+  {
+#ifdef DISABLE_PROTOBUF
+    return nullptr;
+#else
+    if (dnsdist::configuration::getCurrentRuntimeConfiguration().d_openTelemetryTracing) {
+      if (d_OTTracer != nullptr) {
+        return d_OTTracer;
+      }
+      // OpenTelemetry tracing is enabled, but we don't have a tracer yet
+      d_OTTracer = pdns::trace::dnsdist::Tracer::getTracer();
+      return d_OTTracer;
+    }
+    return nullptr;
+#endif
+  }
 
   InternalQueryState()
   {
@@ -193,6 +213,9 @@ struct InternalQueryState
   bool selfGenerated{false};
   bool cacheHit{false};
   bool staleCacheHit{false};
+
+private:
+  std::shared_ptr<pdns::trace::dnsdist::Tracer> d_OTTracer{nullptr};
 };
 
 struct IDState
index c56b0c5256def7dff8efd778f532c1496d339204..3d0754e263f8901490d1b06a98ec36c8b99bb202 100644 (file)
@@ -343,8 +343,8 @@ void setupLuaBindingsDNSQuestion([[maybe_unused]] LuaContext& luaCtx)
 #ifdef DISABLE_PROTOBUF
       return std::nullopt;
 #else
-      if (dnsQuestion.ids.tracingEnabled) {
-        auto traceID = dnsQuestion.ids.d_OTTracer->getTraceID();
+      if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) {
+        auto traceID = tracer->getTraceID();
         return std::string(traceID.begin(), traceID.end());
       }
       return std::nullopt;
@@ -357,8 +357,8 @@ void setupLuaBindingsDNSQuestion([[maybe_unused]] LuaContext& luaCtx)
 #ifdef DISABLE_PROTOBUF
       return std::nullopt;
 #else
-      if (dnsQuestion.ids.tracingEnabled) {
-        auto spanID = dnsQuestion.ids.d_OTTracer->getLastSpanID();
+      if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) {
+        auto spanID = tracer->getLastSpanID();
         return std::string(spanID.begin(), spanID.end());
       }
       return std::nullopt;
index dd6d63c194e72c73b9761637deba7eba6fcf9361..5ee8730f8b018ab7dc85749412f1e26544007b91 100644 (file)
@@ -70,6 +70,7 @@ static const std::map<std::string, BooleanConfigurationItems> s_booleanConfigIte
   {"setQueryCount", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_queryCountConfig.d_enabled = newValue; }}},
   {"setVerbose", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_verbose = newValue; }}},
   {"setVerboseHealthChecks", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_verboseHealthChecks = newValue; }}},
+  {"setOpenTelemetryTracing", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_openTelemetryTracing = newValue; }}},
   {"setServFailWhenNoServer", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_servFailOnNoPolicy = newValue; }}},
   {"setRoundRobinFailOnNoServer", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_roundrobinFailOnNoServer = newValue; }}},
   {"setDropEmptyQueries", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_dropEmptyQueries = newValue; }}},
index 785463a05280df772aba54916e07bbffb5b49ecf..73f4e43f54b129d3f4f11cb6cb8c43c1cc5b026a 100644 (file)
@@ -244,9 +244,9 @@ void DNSDistProtoBufMessage::serialize(std::string& data) const
     }
   }
 
-  if (d_dq.ids.tracingEnabled) {
-    msg.setOpenTelemetryTraceID(d_dq.ids.d_OTTracer->getTraceID());
-    msg.setOpenTelemetryData(d_dq.ids.d_OTTracer->getOTProtobuf());
+  if (auto tracer = d_dq.ids.getTracer(); tracer != nullptr && d_dq.ids.tracingEnabled) {
+    msg.setOpenTelemetryTraceID(tracer->getTraceID());
+    msg.setOpenTelemetryData(tracer->getOTProtobuf());
   }
 }
 
index 5303a034b4dd65af604611d506461428b6e5e751..b0f7a70b3cf2b34acbe9dd4768eb2e68ece92dc1 100644 (file)
@@ -1821,6 +1821,13 @@ logging:
     - name: "structured"
       type: "StructuredLoggingConfiguration"
       default: true
+    - name: "open_telemetry_tracing"
+      type: "bool"
+      default: "false"
+      lua-name: "setOpenTelemetryTracing"
+      internal-field-name: "d_openTelemetryTracing"
+      runtime-configurable: true
+      description: "Set to true to enable OpenTelemetry tracing. When true, the :func:`DNSQuestion:setTrace` makes the query store tracing information (see :doc:`OpenTelemetry tracing <ottrace>`). When this setting is false, no tracing information is gathered at all."
 
 general:
   description: "General settings"
index ebf621d03bbcc9a3d74862da7c57aa84d0558e54..718e5849047cbe6b70b66d8c37bd34d1a6db4624 100644 (file)
@@ -457,8 +457,8 @@ static bool encryptResponse(PacketBuffer& response, size_t maximumSize, bool tcp
 bool applyRulesToResponse(const std::vector<dnsdist::rules::ResponseRuleAction>& respRuleActions, DNSResponse& dnsResponse)
 {
   pdns::trace::dnsdist::Tracer::Closer closer;
-  if (dnsResponse.ids.tracingEnabled) {
-    closer = dnsResponse.ids.d_OTTracer->openSpan("applyRulesToResponse", dnsResponse.ids.d_OTTracer->getLastSpanID());
+  if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) {
+    closer = tracer->openSpan("applyRulesToResponse", tracer->getLastSpanID());
   }
   if (respRuleActions.empty()) {
     return true;
@@ -521,13 +521,13 @@ bool applyRulesToResponse(const std::vector<dnsdist::rules::ResponseRuleAction>&
 bool processResponseAfterRules(PacketBuffer& response, DNSResponse& dnsResponse, [[maybe_unused]] bool muted)
 {
   pdns::trace::dnsdist::Tracer::Closer closer;
-  if (dnsResponse.ids.tracingEnabled) {
-    closer = dnsResponse.ids.d_OTTracer->openSpan("processResponseAfterRules");
+  if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) {
+    closer = tracer->openSpan("processResponseAfterRules");
   }
   bool zeroScope = false;
   if (!fixUpResponse(response, dnsResponse.ids.qname, dnsResponse.ids.origFlags, dnsResponse.ids.ednsAdded, dnsResponse.ids.ecsAdded, dnsResponse.ids.useZeroScope ? &zeroScope : nullptr)) {
-    if (dnsResponse.ids.tracingEnabled) {
-      dnsResponse.ids.d_OTTracer->setSpanAttribute(closer.getSpanID(), "result", AnyValue{"fixUpResponse->false"});
+    if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) {
+      tracer->setSpanAttribute(closer.getSpanID(), "result", AnyValue{"fixUpResponse->false"});
     }
     return false;
   }
@@ -556,8 +556,8 @@ bool processResponseAfterRules(PacketBuffer& response, DNSResponse& dnsResponse,
     }
     {
       pdns::trace::dnsdist::Tracer::Closer cacheInsertCloser;
-      if (dnsResponse.ids.tracingEnabled) {
-        cacheInsertCloser = dnsResponse.ids.d_OTTracer->openSpan("packetCacheInsert", closer.getSpanID());
+      if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) {
+        cacheInsertCloser = tracer->openSpan("packetCacheInsert", closer.getSpanID());
       }
       dnsResponse.ids.packetCache->insert(cacheKey, zeroScope ? boost::none : dnsResponse.ids.subnet, dnsResponse.ids.cacheFlags, dnsResponse.ids.dnssecOK ? *dnsResponse.ids.dnssecOK : false, dnsResponse.ids.qname, dnsResponse.ids.qtype, dnsResponse.ids.qclass, response, dnsResponse.ids.forwardedOverUDP, dnsResponse.getHeader()->rcode, dnsResponse.ids.tempFailureTTL);
     }
@@ -590,8 +590,8 @@ bool processResponseAfterRules(PacketBuffer& response, DNSResponse& dnsResponse,
 bool processResponse(PacketBuffer& response, DNSResponse& dnsResponse, bool muted)
 {
   pdns::trace::dnsdist::Tracer::Closer closer;
-  if (dnsResponse.ids.tracingEnabled) {
-    closer = dnsResponse.ids.d_OTTracer->openSpan("processResponse");
+  if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) {
+    closer = tracer->openSpan("processResponse");
   }
 
   const auto& chains = dnsdist::configuration::getCurrentRuntimeConfiguration().d_ruleChains;
@@ -1047,7 +1047,10 @@ static bool applyRulesChainToQuery(const std::vector<dnsdist::rules::RuleAction>
 
 static bool applyRulesToQuery(DNSQuestion& dnsQuestion, const timespec& now)
 {
-  auto closer = dnsQuestion.ids.d_OTTracer->openSpan("applyRulesToQuery", dnsQuestion.ids.d_OTTracer->getLastSpanID());
+  pdns::trace::dnsdist::Tracer::Closer closer;
+  if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr) {
+    closer = tracer->openSpan("applyRulesToQuery", tracer->getLastSpanID());
+  }
   if (g_rings.shouldRecordQueries()) {
     g_rings.insertQuery(now, dnsQuestion.ids.origRemote, dnsQuestion.ids.qname, dnsQuestion.ids.qtype, dnsQuestion.getData().size(), *dnsQuestion.getHeader(), dnsQuestion.getProtocol());
   }
@@ -1453,15 +1456,18 @@ static ProcessQueryResult handleQueryTurnedIntoSelfAnsweredResponse(DNSQuestion&
 
 static ServerPolicy::SelectedBackend selectBackendForOutgoingQuery(DNSQuestion& dnsQuestion, const ServerPool& serverPool)
 {
-  auto closer = dnsQuestion.ids.d_OTTracer->openSpan("selectBackendForOutgoingQuery", dnsQuestion.ids.d_OTTracer->getLastSpanID());
+  pdns::trace::dnsdist::Tracer::Closer closer;
+  if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) {
+    closer = tracer->openSpan("selectBackendForOutgoingQuery", tracer->getLastSpanID());
+  }
 
   const auto& policy = serverPool.policy != nullptr ? *serverPool.policy : *dnsdist::configuration::getCurrentRuntimeConfiguration().d_lbPolicy;
   const auto& servers = serverPool.getServers();
   auto selectedBackend = policy.getSelectedBackend(servers, dnsQuestion);
 
-  if (dnsQuestion.ids.tracingEnabled) {
-    dnsQuestion.ids.d_OTTracer->setSpanAttribute(closer.getSpanID(), "backend.name", AnyValue{selectedBackend->getNameWithAddr()});
-    dnsQuestion.ids.d_OTTracer->setSpanAttribute(closer.getSpanID(), "backend.id", AnyValue{boost::uuids::to_string(selectedBackend->getID())});
+  if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) {
+    tracer->setSpanAttribute(closer.getSpanID(), "backend.name", AnyValue{selectedBackend->getNameWithAddr()});
+    tracer->setSpanAttribute(closer.getSpanID(), "backend.id", AnyValue{boost::uuids::to_string(selectedBackend->getID())});
   }
 
   return selectedBackend;
@@ -1779,7 +1785,10 @@ std::unique_ptr<CrossProtocolQuery> getUDPCrossProtocolQueryFromDQ(DNSQuestion&
 ProcessQueryResult processQuery(DNSQuestion& dnsQuestion, std::shared_ptr<DownstreamState>& selectedBackend)
 {
 
-  auto closer = dnsQuestion.ids.d_OTTracer->openSpan("processQuery", dnsQuestion.ids.d_OTTracer->getLastSpanID());
+  pdns::trace::dnsdist::Tracer::Closer closer;
+  if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr) {
+    closer = tracer->openSpan("processQuery", tracer->getLastSpanID());
+  }
   const uint16_t queryId = ntohs(dnsQuestion.getHeader()->id);
   try {
     /* we need an accurate ("real") value for the response and
@@ -1811,7 +1820,10 @@ ProcessQueryResult processQuery(DNSQuestion& dnsQuestion, std::shared_ptr<Downst
 
 bool assignOutgoingUDPQueryToBackend(std::shared_ptr<DownstreamState>& downstream, uint16_t queryID, DNSQuestion& dnsQuestion, PacketBuffer& query, bool actuallySend)
 {
-  auto closer = dnsQuestion.ids.d_OTTracer->openSpan("assignOutgoingUDPQueryToBackend", dnsQuestion.ids.d_OTTracer->getLastSpanID());
+  pdns::trace::dnsdist::Tracer::Closer closer;
+  if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) {
+    closer = tracer->openSpan("assignOutgoingUDPQueryToBackend", tracer->getLastSpanID());
+  }
 
   bool doh = dnsQuestion.ids.du != nullptr;
 
@@ -1887,7 +1899,10 @@ static void processUDPQuery(ClientState& clientState, const struct msghdr* msgh,
   uint16_t queryId = 0;
   InternalQueryState ids;
 
-  auto closer = ids.d_OTTracer->openSpan("processUDPQuery");
+  pdns::trace::dnsdist::Tracer::Closer closer;
+  if (auto tracer = ids.getTracer(); tracer != nullptr) {
+    closer = tracer->openSpan("processUDPQuery");
+  }
 
   ids.cs = &clientState;
   ids.origRemote = remote;
index 075c8d5fad58e45189194677c446b3c00890644e..71feeb974c9ee1c7ae8903d38dfe4e0c9739ae95 100644 (file)
@@ -7,6 +7,10 @@ OpenTelemetry Tracing
 
 Since version 2.1.0, when :program:`dnsdist` is built with ProtoBuf support, sent messages (using e.g. :func:`RemoteLogResponseAction`) can contain `OpenTelemetry traces <https://opentelemetry.io/docs/concepts/signals/traces>`__ data.
 
+To enable tracing, use :func:`setOpenTelemetryTracing(true) <setOpenTelemetryTracing>` in your configuration, or ``logging.open_telemetry_tracing`` to ``true`` in your YAML configuration.
+It is also possible to call :func:`setOpenTelemetryTracing` at runtime.
+Once enabled, Rules can be used to turn on tracing on a per-query basis.
+
 Per-query tracing can be enabled using the :func:`SetTraceAction` or :func:`SetTraceResponseAction`. However :program:`dnsdist` captures some data before rules processing in order to have tracing information from before the rules are evaluated.
 When tracing is enabled in the query, :program:`dnsdist` stores start and end times of certain (but not all) functions that are called during the lifetime of the query and the response.
 It is recommended to send the traces out through a RemoteLogger in ResponseRules, to capture as much information as possible.
@@ -23,6 +27,8 @@ Example configuration
 
 .. code-block:: yaml
 
+   logging:
+     open_telemetry_tracing: true
    remote_logging:
      protobuf_loggers:
        - name: pblog