From: Pieter Lexis Date: Fri, 3 Oct 2025 14:51:29 +0000 (+0200) Subject: feat(dnsdist): Add global config option for OpenTelemetry tracing X-Git-Tag: rec-5.4.0-alpha1~187^2~12 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9f84db343c8d9e47a1a871124438cc2f4e58e2a1;p=thirdparty%2Fpdns.git feat(dnsdist): Add global config option for OpenTelemetry tracing --- diff --git a/pdns/dnsdistdist/dnsdist-actions-factory.cc b/pdns/dnsdistdist/dnsdist-actions-factory.cc index d340744920..de4704a820 100644 --- a/pdns/dnsdistdist/dnsdist-actions-factory.cc +++ b/pdns/dnsdistdist/dnsdist-actions-factory.cc @@ -42,6 +42,7 @@ #include "dnstap.hh" #include "dnswriter.hh" +#include "dolog.hh" #include "ednsoptions.hh" #include "fstrm_logger.hh" #include "ipcipher.hh" @@ -1684,14 +1685,19 @@ public: { (void)ruleresult; #ifndef DISABLE_PROTOBUF + auto tracer = dnsquestion->ids.getTracer(); + if (tracer == nullptr) { + vinfolog("SetTraceAction called, but OpenTelemetry tracing is globally disabled. Did you forget to call setOpenTelemetryTracing?"); + return Action::None; + } if (d_value) { - dnsquestion->ids.d_OTTracer->activate(); - dnsquestion->ids.d_OTTracer->setTraceAttribute("query.qname", AnyValue{dnsquestion->ids.qname.toStringNoDot()}); - dnsquestion->ids.d_OTTracer->setTraceAttribute("query.qtype", AnyValue{QType(dnsquestion->ids.qtype).toString()}); - dnsquestion->ids.d_OTTracer->setTraceAttribute("query.remote", AnyValue{dnsquestion->ids.origRemote.toLogString()}); + tracer->activate(); + tracer->setTraceAttribute("query.qname", AnyValue{dnsquestion->ids.qname.toStringNoDot()}); + tracer->setTraceAttribute("query.qtype", AnyValue{QType(dnsquestion->ids.qtype).toString()}); + tracer->setTraceAttribute("query.remote", AnyValue{dnsquestion->ids.origRemote.toLogString()}); } else { - dnsquestion->ids.d_OTTracer->deactivate(); + tracer->deactivate(); } dnsquestion->ids.tracingEnabled = d_value; #endif diff --git a/pdns/dnsdistdist/dnsdist-configuration.hh b/pdns/dnsdistdist/dnsdist-configuration.hh index 605d735278..a08505258a 100644 --- a/pdns/dnsdistdist/dnsdist-configuration.hh +++ b/pdns/dnsdistdist/dnsdist-configuration.hh @@ -171,6 +171,7 @@ struct RuntimeConfiguration bool d_logConsoleConnections{true}; bool d_addEDNSToSelfGeneratedResponses{true}; bool d_applyACLToProxiedClients{false}; + bool d_openTelemetryTracing{false}; // XXX: It would be nice to #ifndef DISABLE_PROTOBUF, but as this is defined in dnsdist-settings-definitions.yml, we can't }; /* Be careful not to hold on this for too long, it can be invalidated diff --git a/pdns/dnsdistdist/dnsdist-idstate.hh b/pdns/dnsdistdist/dnsdist-idstate.hh index a1e5cdd18d..0afb8a89be 100644 --- a/pdns/dnsdistdist/dnsdist-idstate.hh +++ b/pdns/dnsdistdist/dnsdist-idstate.hh @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,7 @@ #include "config.h" #include "dnscrypt.hh" +#include "dnsdist-configuration.hh" #include "dnsname.hh" #include "dnsdist-protocols.hh" #include "ednsextendederror.hh" @@ -115,9 +117,27 @@ struct InternalQueryState // Whether or not Open Telemetry tracing is enabled for this query bool tracingEnabled = false; - - // TODO: Do we want to keep some data *without* creating a tracer for each query? - std::shared_ptr d_OTTracer{pdns::trace::dnsdist::Tracer::getTracer()}; + /** + * @brief Returns the Tracer, but only if OpenTelemetry tracing is globally enabled + * + * @return + */ + std::shared_ptr getTracer() + { +#ifdef DISABLE_PROTOBUF + return nullptr; +#else + if (dnsdist::configuration::getCurrentRuntimeConfiguration().d_openTelemetryTracing) { + if (d_OTTracer != nullptr) { + return d_OTTracer; + } + // OpenTelemetry tracing is enabled, but we don't have a tracer yet + d_OTTracer = pdns::trace::dnsdist::Tracer::getTracer(); + return d_OTTracer; + } + return nullptr; +#endif + } InternalQueryState() { @@ -193,6 +213,9 @@ struct InternalQueryState bool selfGenerated{false}; bool cacheHit{false}; bool staleCacheHit{false}; + +private: + std::shared_ptr d_OTTracer{nullptr}; }; struct IDState diff --git a/pdns/dnsdistdist/dnsdist-lua-bindings-dnsquestion.cc b/pdns/dnsdistdist/dnsdist-lua-bindings-dnsquestion.cc index c56b0c5256..3d0754e263 100644 --- a/pdns/dnsdistdist/dnsdist-lua-bindings-dnsquestion.cc +++ b/pdns/dnsdistdist/dnsdist-lua-bindings-dnsquestion.cc @@ -343,8 +343,8 @@ void setupLuaBindingsDNSQuestion([[maybe_unused]] LuaContext& luaCtx) #ifdef DISABLE_PROTOBUF return std::nullopt; #else - if (dnsQuestion.ids.tracingEnabled) { - auto traceID = dnsQuestion.ids.d_OTTracer->getTraceID(); + if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) { + auto traceID = tracer->getTraceID(); return std::string(traceID.begin(), traceID.end()); } return std::nullopt; @@ -357,8 +357,8 @@ void setupLuaBindingsDNSQuestion([[maybe_unused]] LuaContext& luaCtx) #ifdef DISABLE_PROTOBUF return std::nullopt; #else - if (dnsQuestion.ids.tracingEnabled) { - auto spanID = dnsQuestion.ids.d_OTTracer->getLastSpanID(); + if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) { + auto spanID = tracer->getLastSpanID(); return std::string(spanID.begin(), spanID.end()); } return std::nullopt; diff --git a/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc b/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc index dd6d63c194..5ee8730f8b 100644 --- a/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc +++ b/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc @@ -70,6 +70,7 @@ static const std::map s_booleanConfigIte {"setQueryCount", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_queryCountConfig.d_enabled = newValue; }}}, {"setVerbose", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_verbose = newValue; }}}, {"setVerboseHealthChecks", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_verboseHealthChecks = newValue; }}}, + {"setOpenTelemetryTracing", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_openTelemetryTracing = newValue; }}}, {"setServFailWhenNoServer", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_servFailOnNoPolicy = newValue; }}}, {"setRoundRobinFailOnNoServer", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_roundrobinFailOnNoServer = newValue; }}}, {"setDropEmptyQueries", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_dropEmptyQueries = newValue; }}}, diff --git a/pdns/dnsdistdist/dnsdist-protobuf.cc b/pdns/dnsdistdist/dnsdist-protobuf.cc index 785463a052..73f4e43f54 100644 --- a/pdns/dnsdistdist/dnsdist-protobuf.cc +++ b/pdns/dnsdistdist/dnsdist-protobuf.cc @@ -244,9 +244,9 @@ void DNSDistProtoBufMessage::serialize(std::string& data) const } } - if (d_dq.ids.tracingEnabled) { - msg.setOpenTelemetryTraceID(d_dq.ids.d_OTTracer->getTraceID()); - msg.setOpenTelemetryData(d_dq.ids.d_OTTracer->getOTProtobuf()); + if (auto tracer = d_dq.ids.getTracer(); tracer != nullptr && d_dq.ids.tracingEnabled) { + msg.setOpenTelemetryTraceID(tracer->getTraceID()); + msg.setOpenTelemetryData(tracer->getOTProtobuf()); } } diff --git a/pdns/dnsdistdist/dnsdist-settings-definitions.yml b/pdns/dnsdistdist/dnsdist-settings-definitions.yml index 5303a034b4..b0f7a70b3c 100644 --- a/pdns/dnsdistdist/dnsdist-settings-definitions.yml +++ b/pdns/dnsdistdist/dnsdist-settings-definitions.yml @@ -1821,6 +1821,13 @@ logging: - name: "structured" type: "StructuredLoggingConfiguration" default: true + - name: "open_telemetry_tracing" + type: "bool" + default: "false" + lua-name: "setOpenTelemetryTracing" + internal-field-name: "d_openTelemetryTracing" + runtime-configurable: true + description: "Set to true to enable OpenTelemetry tracing. When true, the :func:`DNSQuestion:setTrace` makes the query store tracing information (see :doc:`OpenTelemetry tracing `). When this setting is false, no tracing information is gathered at all." general: description: "General settings" diff --git a/pdns/dnsdistdist/dnsdist.cc b/pdns/dnsdistdist/dnsdist.cc index ebf621d03b..718e584904 100644 --- a/pdns/dnsdistdist/dnsdist.cc +++ b/pdns/dnsdistdist/dnsdist.cc @@ -457,8 +457,8 @@ static bool encryptResponse(PacketBuffer& response, size_t maximumSize, bool tcp bool applyRulesToResponse(const std::vector& respRuleActions, DNSResponse& dnsResponse) { pdns::trace::dnsdist::Tracer::Closer closer; - if (dnsResponse.ids.tracingEnabled) { - closer = dnsResponse.ids.d_OTTracer->openSpan("applyRulesToResponse", dnsResponse.ids.d_OTTracer->getLastSpanID()); + if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) { + closer = tracer->openSpan("applyRulesToResponse", tracer->getLastSpanID()); } if (respRuleActions.empty()) { return true; @@ -521,13 +521,13 @@ bool applyRulesToResponse(const std::vector& bool processResponseAfterRules(PacketBuffer& response, DNSResponse& dnsResponse, [[maybe_unused]] bool muted) { pdns::trace::dnsdist::Tracer::Closer closer; - if (dnsResponse.ids.tracingEnabled) { - closer = dnsResponse.ids.d_OTTracer->openSpan("processResponseAfterRules"); + if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) { + closer = tracer->openSpan("processResponseAfterRules"); } bool zeroScope = false; if (!fixUpResponse(response, dnsResponse.ids.qname, dnsResponse.ids.origFlags, dnsResponse.ids.ednsAdded, dnsResponse.ids.ecsAdded, dnsResponse.ids.useZeroScope ? &zeroScope : nullptr)) { - if (dnsResponse.ids.tracingEnabled) { - dnsResponse.ids.d_OTTracer->setSpanAttribute(closer.getSpanID(), "result", AnyValue{"fixUpResponse->false"}); + if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) { + tracer->setSpanAttribute(closer.getSpanID(), "result", AnyValue{"fixUpResponse->false"}); } return false; } @@ -556,8 +556,8 @@ bool processResponseAfterRules(PacketBuffer& response, DNSResponse& dnsResponse, } { pdns::trace::dnsdist::Tracer::Closer cacheInsertCloser; - if (dnsResponse.ids.tracingEnabled) { - cacheInsertCloser = dnsResponse.ids.d_OTTracer->openSpan("packetCacheInsert", closer.getSpanID()); + if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) { + cacheInsertCloser = tracer->openSpan("packetCacheInsert", closer.getSpanID()); } dnsResponse.ids.packetCache->insert(cacheKey, zeroScope ? boost::none : dnsResponse.ids.subnet, dnsResponse.ids.cacheFlags, dnsResponse.ids.dnssecOK ? *dnsResponse.ids.dnssecOK : false, dnsResponse.ids.qname, dnsResponse.ids.qtype, dnsResponse.ids.qclass, response, dnsResponse.ids.forwardedOverUDP, dnsResponse.getHeader()->rcode, dnsResponse.ids.tempFailureTTL); } @@ -590,8 +590,8 @@ bool processResponseAfterRules(PacketBuffer& response, DNSResponse& dnsResponse, bool processResponse(PacketBuffer& response, DNSResponse& dnsResponse, bool muted) { pdns::trace::dnsdist::Tracer::Closer closer; - if (dnsResponse.ids.tracingEnabled) { - closer = dnsResponse.ids.d_OTTracer->openSpan("processResponse"); + if (auto tracer = dnsResponse.ids.getTracer(); tracer != nullptr && dnsResponse.ids.tracingEnabled) { + closer = tracer->openSpan("processResponse"); } const auto& chains = dnsdist::configuration::getCurrentRuntimeConfiguration().d_ruleChains; @@ -1047,7 +1047,10 @@ static bool applyRulesChainToQuery(const std::vector static bool applyRulesToQuery(DNSQuestion& dnsQuestion, const timespec& now) { - auto closer = dnsQuestion.ids.d_OTTracer->openSpan("applyRulesToQuery", dnsQuestion.ids.d_OTTracer->getLastSpanID()); + pdns::trace::dnsdist::Tracer::Closer closer; + if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr) { + closer = tracer->openSpan("applyRulesToQuery", tracer->getLastSpanID()); + } if (g_rings.shouldRecordQueries()) { g_rings.insertQuery(now, dnsQuestion.ids.origRemote, dnsQuestion.ids.qname, dnsQuestion.ids.qtype, dnsQuestion.getData().size(), *dnsQuestion.getHeader(), dnsQuestion.getProtocol()); } @@ -1453,15 +1456,18 @@ static ProcessQueryResult handleQueryTurnedIntoSelfAnsweredResponse(DNSQuestion& static ServerPolicy::SelectedBackend selectBackendForOutgoingQuery(DNSQuestion& dnsQuestion, const ServerPool& serverPool) { - auto closer = dnsQuestion.ids.d_OTTracer->openSpan("selectBackendForOutgoingQuery", dnsQuestion.ids.d_OTTracer->getLastSpanID()); + pdns::trace::dnsdist::Tracer::Closer closer; + if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) { + closer = tracer->openSpan("selectBackendForOutgoingQuery", tracer->getLastSpanID()); + } const auto& policy = serverPool.policy != nullptr ? *serverPool.policy : *dnsdist::configuration::getCurrentRuntimeConfiguration().d_lbPolicy; const auto& servers = serverPool.getServers(); auto selectedBackend = policy.getSelectedBackend(servers, dnsQuestion); - if (dnsQuestion.ids.tracingEnabled) { - dnsQuestion.ids.d_OTTracer->setSpanAttribute(closer.getSpanID(), "backend.name", AnyValue{selectedBackend->getNameWithAddr()}); - dnsQuestion.ids.d_OTTracer->setSpanAttribute(closer.getSpanID(), "backend.id", AnyValue{boost::uuids::to_string(selectedBackend->getID())}); + if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) { + tracer->setSpanAttribute(closer.getSpanID(), "backend.name", AnyValue{selectedBackend->getNameWithAddr()}); + tracer->setSpanAttribute(closer.getSpanID(), "backend.id", AnyValue{boost::uuids::to_string(selectedBackend->getID())}); } return selectedBackend; @@ -1779,7 +1785,10 @@ std::unique_ptr getUDPCrossProtocolQueryFromDQ(DNSQuestion& ProcessQueryResult processQuery(DNSQuestion& dnsQuestion, std::shared_ptr& selectedBackend) { - auto closer = dnsQuestion.ids.d_OTTracer->openSpan("processQuery", dnsQuestion.ids.d_OTTracer->getLastSpanID()); + pdns::trace::dnsdist::Tracer::Closer closer; + if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr) { + closer = tracer->openSpan("processQuery", tracer->getLastSpanID()); + } const uint16_t queryId = ntohs(dnsQuestion.getHeader()->id); try { /* we need an accurate ("real") value for the response and @@ -1811,7 +1820,10 @@ ProcessQueryResult processQuery(DNSQuestion& dnsQuestion, std::shared_ptr& downstream, uint16_t queryID, DNSQuestion& dnsQuestion, PacketBuffer& query, bool actuallySend) { - auto closer = dnsQuestion.ids.d_OTTracer->openSpan("assignOutgoingUDPQueryToBackend", dnsQuestion.ids.d_OTTracer->getLastSpanID()); + pdns::trace::dnsdist::Tracer::Closer closer; + if (auto tracer = dnsQuestion.ids.getTracer(); tracer != nullptr && dnsQuestion.ids.tracingEnabled) { + closer = tracer->openSpan("assignOutgoingUDPQueryToBackend", tracer->getLastSpanID()); + } bool doh = dnsQuestion.ids.du != nullptr; @@ -1887,7 +1899,10 @@ static void processUDPQuery(ClientState& clientState, const struct msghdr* msgh, uint16_t queryId = 0; InternalQueryState ids; - auto closer = ids.d_OTTracer->openSpan("processUDPQuery"); + pdns::trace::dnsdist::Tracer::Closer closer; + if (auto tracer = ids.getTracer(); tracer != nullptr) { + closer = tracer->openSpan("processUDPQuery"); + } ids.cs = &clientState; ids.origRemote = remote; diff --git a/pdns/dnsdistdist/docs/reference/ottrace.rst b/pdns/dnsdistdist/docs/reference/ottrace.rst index 075c8d5fad..71feeb974c 100644 --- a/pdns/dnsdistdist/docs/reference/ottrace.rst +++ b/pdns/dnsdistdist/docs/reference/ottrace.rst @@ -7,6 +7,10 @@ OpenTelemetry Tracing Since version 2.1.0, when :program:`dnsdist` is built with ProtoBuf support, sent messages (using e.g. :func:`RemoteLogResponseAction`) can contain `OpenTelemetry traces `__ data. +To enable tracing, use :func:`setOpenTelemetryTracing(true) ` in your configuration, or ``logging.open_telemetry_tracing`` to ``true`` in your YAML configuration. +It is also possible to call :func:`setOpenTelemetryTracing` at runtime. +Once enabled, Rules can be used to turn on tracing on a per-query basis. + Per-query tracing can be enabled using the :func:`SetTraceAction` or :func:`SetTraceResponseAction`. However :program:`dnsdist` captures some data before rules processing in order to have tracing information from before the rules are evaluated. When tracing is enabled in the query, :program:`dnsdist` stores start and end times of certain (but not all) functions that are called during the lifetime of the query and the response. It is recommended to send the traces out through a RemoteLogger in ResponseRules, to capture as much information as possible. @@ -23,6 +27,8 @@ Example configuration .. code-block:: yaml + logging: + open_telemetry_tracing: true remote_logging: protobuf_loggers: - name: pblog