From: Otto Moerbeek Date: Tue, 20 May 2025 12:23:15 +0000 (+0200) Subject: Framework to add opentelemetry trace data in rec X-Git-Tag: rec-5.3.0-alpha1^2~16 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=14944645662e5c87b0827498724bcaf06d46a6e1;p=thirdparty%2Fpdns.git Framework to add opentelemetry trace data in rec Signed-off-by: Otto Moerbeek --- diff --git a/pdns/dns_random.hh b/pdns/dns_random.hh index c3bd314d16..ec32859c02 100644 --- a/pdns/dns_random.hh +++ b/pdns/dns_random.hh @@ -41,6 +41,11 @@ inline uint16_t dns_random_uint16() return arc4random() & 0xffff; } +inline void dns_random(void* buf, size_t size) +{ + arc4random_buf(buf, size); +} + namespace pdns { struct dns_random_engine diff --git a/pdns/dnsmessage.proto b/pdns/dnsmessage.proto index f03c633be7..465b13b875 100644 --- a/pdns/dnsmessage.proto +++ b/pdns/dnsmessage.proto @@ -190,6 +190,7 @@ message PBDNSMessage { optional uint32 outgoingQueries = 27; // Number of outgoing queries used to answer the query optional uint32 headerFlags = 28; // Flags field in wire format, 16 bits used optional uint32 ednsVersion = 29; // EDNS version and flags in wire format, see https://www.rfc-editor.org/rfc/rfc6891.html#section-6.1.3 + optional bytes openTelemetryData = 30; // Protobuf encoded OPen Telemetry Data, see https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/trace/v1/trace.proto } message PBDNSMessageList { diff --git a/pdns/protozero.hh b/pdns/protozero.hh index baac7c87f9..3b42e5fc74 100644 --- a/pdns/protozero.hh +++ b/pdns/protozero.hh @@ -103,6 +103,7 @@ namespace ProtoZero outgoingQueries = 27, headerFlags = 28, ednsVersion = 29, + openTelemetryData = 30, }; enum class QuestionField : protozero::pbf_tag_type { @@ -314,6 +315,13 @@ namespace ProtoZero add_uint32(d_message, Field::ednsVersion, version); } + void setOpenTelemetryData(const std::string& data) + { + if (!data.empty()) { + add_string(d_message, Field::openTelemetryData, data); + } + } + void startResponse() { d_response = protozero::pbf_writer{d_message, static_cast(Field::response)}; diff --git a/pdns/recursordist/decode_ot.py b/pdns/recursordist/decode_ot.py index 4970a10c48..a55d38cf0c 100755 --- a/pdns/recursordist/decode_ot.py +++ b/pdns/recursordist/decode_ot.py @@ -10,7 +10,7 @@ import sys import google.protobuf.message import google.protobuf.json_format -import opentelemetry.proto.trace.v1.trace_pb2 + data = sys.stdin.buffer.read() diff --git a/pdns/recursordist/pdns_recursor.cc b/pdns/recursordist/pdns_recursor.cc index 98ad79f2f9..4657ecebc5 100644 --- a/pdns/recursordist/pdns_recursor.cc +++ b/pdns/recursordist/pdns_recursor.cc @@ -1072,6 +1072,7 @@ void startDoResolve(void* arg) // NOLINT(readability-function-cognitive-complexi bool seenAuthSOA = false; resolver.d_eventTrace = std::move(comboWriter->d_eventTrace); + resolver.d_otTrace = std::move(comboWriter->d_otTrace); resolver.setId(g_multiTasker->getTid()); bool DNSSECOK = false; @@ -1873,6 +1874,14 @@ void startDoResolve(void* arg) // NOLINT(readability-function-cognitive-complexi if (resolver.d_eventTrace.enabled() && (SyncRes::s_event_trace_enabled & SyncRes::event_trace_to_pb) != 0) { pbMessage.addEvents(resolver.d_eventTrace); } + if (resolver.d_eventTrace.enabled() && (SyncRes::s_event_trace_enabled & SyncRes::event_trace_to_ot) != 0) { + resolver.d_otTrace.close(); + auto spans = resolver.d_eventTrace.convertToOT(resolver.d_otTrace); + pdns::trace::TracesData otTrace{ + .resource_spans = { pdns::trace::ResourceSpans{.resource = {}, .scope_spans = {{.spans = spans}}}}}; + string otData = otTrace.encode(); + pbMessage.setOpenTelemetryData(otData); + } if (comboWriter->d_logResponse) { protobufLogResponse(pbMessage); } @@ -2170,7 +2179,7 @@ bool expectProxyProtocol(const ComboAddress& from, const ComboAddress& listenAdd // source: the address we assume the query is coming from, might be set by proxy protocol // destination: the address we assume the query was sent to, might be set by proxy protocol // mappedSource: the address we assume the query is coming from. Differs from source if table based mapping has been applied -static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, ComboAddress source, ComboAddress destination, const ComboAddress& mappedSource, struct timeval tval, int fileDesc, std::vector& proxyProtocolValues, RecEventTrace& eventTrace) // NOLINT(readability-function-cognitive-complexity): https://github.com/PowerDNS/pdns/issues/12791 +static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, ComboAddress source, ComboAddress destination, const ComboAddress& mappedSource, struct timeval tval, int fileDesc, std::vector& proxyProtocolValues, RecEventTrace& eventTrace, pdns::trace::Span& otTrace) // NOLINT(readability-function-cognitive-complexity): https://github.com/PowerDNS/pdns/issues/12791 { RecThreadInfo::self().incNumberOfDistributedQueries(); gettimeofday(&g_now, nullptr); @@ -2330,7 +2339,7 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr eventTrace.add(RecEventTrace::AnswerSent); if (t_protobufServers.servers && logResponse && (!luaconfsLocal->protobufExportConfig.taggedOnly || (pbData && pbData->d_tagged))) { - protobufLogResponse(dnsheader, luaconfsLocal, pbData, tval, false, source, destination, mappedSource, ednssubnet, uniqueId, requestorId, deviceId, deviceName, meta, eventTrace, policyTags); + protobufLogResponse(dnsheader, luaconfsLocal, pbData, tval, false, source, destination, mappedSource, ednssubnet, uniqueId, requestorId, deviceId, deviceName, meta, eventTrace, otTrace, policyTags); } if (eventTrace.enabled() && (SyncRes::s_event_trace_enabled & SyncRes::event_trace_to_log) != 0) { @@ -2442,6 +2451,8 @@ static string* doProcessUDPQuestion(const std::string& question, const ComboAddr comboWriter->d_meta = std::move(meta); comboWriter->d_eventTrace = std::move(eventTrace); + comboWriter->d_otTrace = std::move(otTrace); + g_multiTasker->makeThread(startDoResolve, (void*)comboWriter.release()); // deletes dc return nullptr; @@ -2461,6 +2472,7 @@ static void handleNewUDPQuestion(int fileDesc, FDMultiplexer::funcparam_t& /* va bool firstQuery = true; std::vector proxyProtocolValues; RecEventTrace eventTrace; + pdns::trace::Span otTrace; for (size_t queriesCounter = 0; queriesCounter < g_maxUDPQueriesPerRound; queriesCounter++) { bool proxyProto = false; @@ -2472,8 +2484,24 @@ static void handleNewUDPQuestion(int fileDesc, FDMultiplexer::funcparam_t& /* va if (ssize_t len = recvmsg(fileDesc, &msgh, 0); len >= 0) { eventTrace.clear(); eventTrace.setEnabled(SyncRes::s_event_trace_enabled != 0); + // evenTrace use monotonic time, while OpenTelemetry uses absolute time. setEnabled() + // estabslished the reference point, get an absolute TS as close as possible to the + // eventTrace start of trace time. + auto traceTS = pdns::trace::timestamp(); eventTrace.add(RecEventTrace::ReqRecv); - + if ((SyncRes::s_event_trace_enabled & SyncRes::event_trace_to_ot) != 0) { + otTrace.start_time_unix_nano = traceTS; + pdns::trace::TraceID traceid; + pdns::trace::random(traceid); + pdns::trace::SpanID spanid; + pdns::trace::random(spanid); + pdns::trace::SpanID parent; + pdns::trace::reset(parent); + otTrace.name = "RecRequest"; + otTrace.trace_id = traceid; + otTrace.span_id = spanid; + otTrace.parent_span_id = parent; + } firstQuery = false; if ((msgh.msg_flags & MSG_TRUNC) != 0) { @@ -2632,12 +2660,12 @@ static void handleNewUDPQuestion(int fileDesc, FDMultiplexer::funcparam_t& /* va if (RecThreadInfo::weDistributeQueries()) { std::string localdata = data; - distributeAsyncFunction(data, [localdata = std::move(localdata), fromaddr, destaddr, source, destination, mappedSource, tval, fileDesc, proxyProtocolValues, eventTrace]() mutable { - return doProcessUDPQuestion(localdata, fromaddr, destaddr, source, destination, mappedSource, tval, fileDesc, proxyProtocolValues, eventTrace); + distributeAsyncFunction(data, [localdata = std::move(localdata), fromaddr, destaddr, source, destination, mappedSource, tval, fileDesc, proxyProtocolValues, eventTrace, otTrace]() mutable { + return doProcessUDPQuestion(localdata, fromaddr, destaddr, source, destination, mappedSource, tval, fileDesc, proxyProtocolValues, eventTrace, otTrace); }); } else { - doProcessUDPQuestion(data, fromaddr, destaddr, source, destination, mappedSource, tval, fileDesc, proxyProtocolValues, eventTrace); + doProcessUDPQuestion(data, fromaddr, destaddr, source, destination, mappedSource, tval, fileDesc, proxyProtocolValues, eventTrace, otTrace); } } } diff --git a/pdns/recursordist/protozero-trace.hh b/pdns/recursordist/protozero-trace.hh index e2b6244cbe..6cc4e11cb0 100644 --- a/pdns/recursordist/protozero-trace.hh +++ b/pdns/recursordist/protozero-trace.hh @@ -29,6 +29,8 @@ #include #include +#include "dns_random.hh" + // See https://github.com/open-telemetry/opentelemetry-proto/tree/main/opentelemetry/proto namespace pdns::trace @@ -220,6 +222,26 @@ struct InstrumentationScope using TraceID = std::array; using SpanID = std::array; +inline void random(TraceID& trace) +{ + dns_random(trace.data(), trace.size()); +} + +inline void random(SpanID& span) +{ + dns_random(span.data(), span.size()); +} + +inline void reset(TraceID& trace) +{ + memset(trace.data(), 0, trace.size()); +} + +inline void reset(SpanID& span) +{ + memset(span.data(), 0, span.size()); +} + inline void encode(protozero::pbf_writer& writer, uint8_t field, const TraceID& value) { writer.add_bytes(field, reinterpret_cast(value.data()), value.size()); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) it's the API @@ -273,6 +295,13 @@ struct Status static Status decode(protozero::pbf_reader& reader); }; +inline uint64_t timestamp() +{ + timespec now{}; + clock_gettime(CLOCK_REALTIME, &now); + return (1000000000ULL* now.tv_sec) + now.tv_nsec; +} + struct Span { // A unique identifier for a trace. All spans from the same trace share @@ -394,7 +423,7 @@ struct Span std::vector events; // = 11 // dropped_events_count is the number of dropped events. If the value is 0, then no // events were dropped. - uint32_t dropped_events_count; // = 12 + uint32_t dropped_events_count{0}; // = 12 // A pointer from the current span to another span in the same trace or in a // different trace. For example, this can be used in batching operations, @@ -465,6 +494,10 @@ struct Span // [Optional]. uint32_t flags{0}; // = 16; + void close() + { + end_time_unix_nano = timestamp(); + } void encode(protozero::pbf_writer& writer) const; static Span decode(protozero::pbf_reader& reader); }; @@ -514,6 +547,12 @@ struct ScopeSpans // This schema_url applies to all spans and span events in the "spans" field. std::string schema_url; // = 3 + void close() + { + for (auto& element : spans) { + element.close(); + } + } void encode(protozero::pbf_writer& writer) const; static ScopeSpans decode(protozero::pbf_reader& reader); }; @@ -534,6 +573,12 @@ struct ResourceSpans // to the data in the "scope_spans" field which have their own schema_url field. std::string schema_url; // = 3 + void close() + { + for (auto& element : scope_spans) { + element.close(); + } + } void encode(protozero::pbf_writer& writer) const; static ResourceSpans decode(protozero::pbf_reader& reader); }; @@ -557,8 +602,22 @@ struct TracesData // array will contain multiple elements. std::vector resource_spans; // = 1 + void close() + { + for (auto& element : resource_spans) { + element.close(); + } + } void encode(protozero::pbf_writer& writer) const; static TracesData decode(protozero::pbf_reader& reader); + + [[nodiscard]] std::string encode() const { + std::string data; + protozero::pbf_writer writer{data}; + encode(writer); + return data; + } + }; inline ArrayValue ArrayValue::decode(protozero::pbf_reader& reader) diff --git a/pdns/recursordist/rec-eventtrace.cc b/pdns/recursordist/rec-eventtrace.cc index beceebf1e7..38f034a0cb 100644 --- a/pdns/recursordist/rec-eventtrace.cc +++ b/pdns/recursordist/rec-eventtrace.cc @@ -39,3 +39,56 @@ const std::unordered_map RecEventTrace::s NameEntry(LuaNoData), NameEntry(LuaNXDomain), NameEntry(LuaPostResolveFFI)}; + +using namespace pdns::trace; + +std::vector RecEventTrace::convertToOT(const Span& span) const +{ + timespec realtime{}; + clock_gettime(CLOCK_REALTIME, &realtime); + timespec monotime{}; + clock_gettime(CLOCK_MONOTONIC, &monotime); + auto diff = (1000000000ULL * realtime.tv_sec) + realtime.tv_nsec - ((1000000000ULL * monotime.tv_sec) + monotime.tv_nsec); + diff += d_base; + + std::vector ret; + ret.reserve((d_events.size() / 2) + 1); + ret.emplace_back(span); + std::map pairs; + for (const auto& event : d_events) { + if (event.d_start) { + Span work{ + .name = RecEventTrace::toString(event.d_event), + .trace_id = span.trace_id, + .parent_span_id = span.span_id, + .start_time_unix_nano = static_cast(event.d_ts + diff), + .end_time_unix_nano = static_cast(event.d_ts + diff), + }; + random(work.span_id); + ret.emplace_back(work); + pairs[event.d_event] = ret.size() - 1; + } + else { + if (auto startEvent = pairs.find(event.d_event); startEvent != pairs.end()) { + auto& work = ret.at(startEvent->second); + if (!std::holds_alternative(event.d_value)) { + if (std::holds_alternative(event.d_value)) { + work.attributes.emplace_back(KeyValue{"value", {std::get(event.d_value)}}); + } + else if (std::holds_alternative(event.d_value)) { + work.attributes.emplace_back(KeyValue{"value", {std::get(event.d_value)}}); + } + else if (std::holds_alternative(event.d_value)) { + work.attributes.emplace_back(KeyValue{"value", {std::get(event.d_value)}}); + } + else { + work.attributes.emplace_back(KeyValue{"value", {toString(event.d_value)}}); + } + } + work.end_time_unix_nano = static_cast(event.d_ts + diff); + pairs.erase(event.d_event); + } + } + } + return ret; +} diff --git a/pdns/recursordist/rec-eventtrace.hh b/pdns/recursordist/rec-eventtrace.hh index 157e306d6f..49bee212dc 100644 --- a/pdns/recursordist/rec-eventtrace.hh +++ b/pdns/recursordist/rec-eventtrace.hh @@ -29,6 +29,7 @@ #include #include #include +#include "protozero-trace.hh" class RecEventTrace { @@ -259,6 +260,8 @@ public: return d_events; } + std::vector convertToOT(const pdns::trace::Span& span) const; + private: std::vector d_events; int64_t d_base; diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc index 64c8e9fe6e..ede884ffae 100644 --- a/pdns/recursordist/rec-main.cc +++ b/pdns/recursordist/rec-main.cc @@ -602,6 +602,7 @@ void protobufLogResponse(const struct dnsheader* header, LocalStateHolder& meta, const RecEventTrace& eventTrace, + pdns::trace::Span &otTrace, const std::unordered_set& policyTags) { pdns::ProtoZero::RecMessage pbMessage(pbData ? pbData->d_message : "", pbData ? pbData->d_response : "", 64, 10); // The extra bytes we are going to add @@ -662,6 +663,13 @@ void protobufLogResponse(const struct dnsheader* header, LocalStateHolder& meta, const RecEventTrace& eventTrace, + pdns::trace::Span& otTrace, const std::unordered_set& policyTags); void requestWipeCaches(const DNSName& canon); void startDoResolve(void*); diff --git a/pdns/recursordist/rec-tcp.cc b/pdns/recursordist/rec-tcp.cc index 0bea53d916..2dd833fad1 100644 --- a/pdns/recursordist/rec-tcp.cc +++ b/pdns/recursordist/rec-tcp.cc @@ -461,7 +461,7 @@ static void doProcessTCPQuestion(std::unique_ptr& comboWriter, s if (t_protobufServers.servers && comboWriter->d_logResponse && (!luaconfsLocal->protobufExportConfig.taggedOnly || (pbData && pbData->d_tagged))) { struct timeval tval{ 0, 0}; - protobufLogResponse(dnsheader, luaconfsLocal, pbData, tval, true, comboWriter->d_source, comboWriter->d_destination, comboWriter->d_mappedSource, comboWriter->d_ednssubnet, comboWriter->d_uuid, comboWriter->d_requestorId, comboWriter->d_deviceId, comboWriter->d_deviceName, comboWriter->d_meta, comboWriter->d_eventTrace, comboWriter->d_policyTags); + protobufLogResponse(dnsheader, luaconfsLocal, pbData, tval, true, comboWriter->d_source, comboWriter->d_destination, comboWriter->d_mappedSource, comboWriter->d_ednssubnet, comboWriter->d_uuid, comboWriter->d_requestorId, comboWriter->d_deviceId, comboWriter->d_deviceName, comboWriter->d_meta, comboWriter->d_eventTrace, comboWriter->d_otTrace, comboWriter->d_policyTags); } if (comboWriter->d_eventTrace.enabled() && (SyncRes::s_event_trace_enabled & SyncRes::event_trace_to_log) != 0) { diff --git a/pdns/recursordist/syncres.hh b/pdns/recursordist/syncres.hh index 61ee00bde1..0e57a26ec6 100644 --- a/pdns/recursordist/syncres.hh +++ b/pdns/recursordist/syncres.hh @@ -54,6 +54,7 @@ #include "logr.hh" #include "rec-tcounters.hh" #include "ednsextendederror.hh" +#include "protozero-trace.hh" #ifdef HAVE_CONFIG_H #include "config.h" @@ -568,6 +569,7 @@ public: static const int event_trace_to_pb = 1; static const int event_trace_to_log = 2; + static const int event_trace_to_ot = 4; static int s_event_trace_enabled; static bool s_save_parent_ns_set; static bool s_addExtendedResolutionDNSErrors; @@ -578,6 +580,7 @@ public: boost::optional d_routingTag; ComboAddress d_fromAuthIP; RecEventTrace d_eventTrace; + pdns::trace::Span d_otTrace; std::shared_ptr d_slog = g_slog->withName("syncres"); boost::optional d_extendedError;