From: Remi Gacogne Date: Fri, 23 Jan 2026 16:07:11 +0000 (+0100) Subject: dnsdist: Protect more OT Tracer data behind the lock X-Git-Tag: rec-5.4.0-beta1~2^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=351c27da5302ef656b06120152b9917cd80c93ec;p=thirdparty%2Fpdns.git dnsdist: Protect more OT Tracer data behind the lock I was investigating a crash that occurred on our CI: ``` 2026-01-23T14:33:07.1755774Z === configs/dnsdist_TestOpenTelemetryTracingStripIncomingTraceParent.log === 2026-01-23T14:33:07.1757303Z msg="dnsdist 0.0.0-git1 comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it according to the terms of the GPL version 2" subsystem="setup" level="0" prio="Info" ts="1769178505.183" 2026-01-23T14:33:07.1758153Z msg="Raised send buffer size" subsystem="setup" level="0" prio="Info" ts="1769178505.203" frontend.address="127.0.0.1:14303" network.send_buffer_size="212992" 2026-01-23T14:33:07.1758909Z msg="Raised receive buffer size" subsystem="setup" level="0" prio="Info" ts="1769178505.203" buffer_size="1048576" frontend.address="127.0.0.1:14303" 2026-01-23T14:33:07.1759563Z msg="Listening on Do53 frontend" subsystem="setup" level="0" prio="Info" ts="1769178505.203" frontend.address="127.0.0.1:14303" 2026-01-23T14:33:07.1760608Z msg="Allowing queries from" subsystem="setup" level="0" prio="Info" ts="1769178505.204" acl="10.0.0.0/8, 100.64.0.0/10, 127.0.0.0/8, 169.254.0.0/16, 172.16.0.0/12, 192.168.0.0/16, ::1/128, fc00::/7, fe80::/10" 2026-01-23T14:33:07.1761243Z msg="Allowing console connections from" subsystem="setup" level="0" prio="Info" ts="1769178505.204" acl="127.0.0.0/8, ::1/128" 2026-01-23T14:33:07.1762396Z msg="Setting initial status for backend" subsystem="backend" level="0" prio="Info" ts="1769178505.209" backend.address="127.0.0.1:14002" backend.health_check.status="up" backend.name="" backend.protocol="DoUDP" 2026-01-23T14:33:07.1763777Z dnsdist: ../../../../../../tmp/dnsdist-meson-dist-build/meson-dist/dnsdist-0.0.0-git1/dnsdist-opentelemetry.cc:168: void pdns::trace::dnsdist::Tracer::closeSpan(const SpanID &): Assertion `d_spanIDStack.back() == spanID' failed. ``` While trying to work out how this condition could fail, I quickly realized it was hard for me to follow which fields were protected behind a lock and which weren't, and in some cases it looked like there could be a race. Since performance is not critical in this code, and I would rather trade correctness for performance whenever possible anyway, this commit is moving all the related fields behind the lock. It might or might not fix the issue, as I haven't been able to reproduce it yet, but in any case I believe it will make it easier to reason about it. Signed-off-by: Remi Gacogne --- diff --git a/pdns/dnsdistdist/dnsdist-opentelemetry.cc b/pdns/dnsdistdist/dnsdist-opentelemetry.cc index b7232f96bc..0d7a2eb12b 100644 --- a/pdns/dnsdistdist/dnsdist-opentelemetry.cc +++ b/pdns/dnsdistdist/dnsdist-opentelemetry.cc @@ -41,38 +41,39 @@ TracesData Tracer::getTracesData() #ifdef DISABLE_PROTOBUF return 0; #else - auto otTrace = pdns::trace::TracesData{ - .resource_spans = { - {.resource = { - .attributes = { - {"service.name", {"dnsdist"}}, - }}, - .scope_spans = {{.scope = { - .name = "dnsdist/queryFromFrontend", - .version = PACKAGE_VERSION, - .attributes = {d_attributes.begin(), d_attributes.end()}, - }, - .spans = {}}}}}}; + auto traceid = getTraceID(); + { + auto data = d_data.read_only_lock(); + auto otTrace = pdns::trace::TracesData{ + .resource_spans = { + {.resource = { + .attributes = { + {"service.name", {"dnsdist"}}, + }}, + .scope_spans = {{.scope = { + .name = "dnsdist/queryFromFrontend", + .version = PACKAGE_VERSION, + .attributes = {data->d_attributes.cbegin(), data->d_attributes.cend()}, + }, + .spans = {}}}}}}; otTrace.resource_spans.at(0).scope_spans.at(0).scope.attributes.push_back(hostnameAttr); - auto traceid = getTraceID(); - - for (auto const& lockedSpan : *d_spans.read_only_lock()) { - otTrace.resource_spans.at(0).scope_spans.at(0).spans.push_back( - { - .trace_id = traceid, - .span_id = lockedSpan.span_id == d_oldAndNewRootSpanID.oldID ? d_oldAndNewRootSpanID.newID : lockedSpan.span_id, - .parent_span_id = lockedSpan.parent_span_id == d_oldAndNewRootSpanID.oldID ? d_oldAndNewRootSpanID.newID : lockedSpan.parent_span_id, - .name = lockedSpan.name, - .kind = pdns::trace::Span::SpanKind::SPAN_KIND_SERVER, - .start_time_unix_nano = lockedSpan.start_time_unix_nano, - .end_time_unix_nano = lockedSpan.end_time_unix_nano, - .attributes = lockedSpan.attributes, - }); + for (auto const& span : data->d_spans) { + otTrace.resource_spans.at(0).scope_spans.at(0).spans.push_back( + { + .trace_id = traceid, + .span_id = span.span_id == data->d_oldAndNewRootSpanID.oldID ? data->d_oldAndNewRootSpanID.newID : span.span_id, + .parent_span_id = span.parent_span_id == data->d_oldAndNewRootSpanID.oldID ? data->d_oldAndNewRootSpanID.newID : span.parent_span_id, + .name = span.name, + .kind = pdns::trace::Span::SpanKind::SPAN_KIND_SERVER, + .start_time_unix_nano = span.start_time_unix_nano, + .end_time_unix_nano = span.end_time_unix_nano, + .attributes = span.attributes, + }); + } + return otTrace; } - - return otTrace; #endif } @@ -101,16 +102,19 @@ SpanID Tracer::addSpan([[maybe_unused]] const std::string& name, [[maybe_unused] return 0; #else auto spanID = pdns::trace::SpanID::getRandomSpanID(); - d_spans.lock()->push_back({ - .name = name, - .span_id = spanID, - .parent_span_id = parentSpanID, - .start_time_unix_nano = pdns::trace::timestamp(), - .end_time_unix_nano = 0, - .attributes = {}, - }); - - d_spanIDStack.push_back(spanID); + { + auto data = d_data.lock(); + data->d_spans.push_back({ + .name = name, + .span_id = spanID, + .parent_span_id = parentSpanID, + .start_time_unix_nano = pdns::trace::timestamp(), + .end_time_unix_nano = 0, + .attributes = {}, + }); + + data->d_spanIDStack.emplace_back(spanID); + } return spanID; #endif } @@ -118,25 +122,33 @@ SpanID Tracer::addSpan([[maybe_unused]] const std::string& name, [[maybe_unused] void Tracer::setTraceID([[maybe_unused]] const TraceID& traceID) { #ifndef DISABLE_PROTOBUF - *d_traceid.lock() = traceID; + d_data.lock()->d_traceid = traceID; #endif } void Tracer::setRootSpanID([[maybe_unused]] const SpanID& spanID) { #ifndef DISABLE_PROTOBUF - SpanID oldRootSpanID; - if (auto spans = d_spans.read_only_lock(); !spans->empty()) { - auto iter = std::find_if(spans->cbegin(), spans->cend(), [](const auto& span) { return span.parent_span_id == pdns::trace::s_emptySpanID; }); - if (iter != spans->cend()) { - oldRootSpanID = iter->span_id; + SpanID oldRootSpanID{pdns::trace::s_emptySpanID}; + { + auto data = d_data.read_only_lock(); + if (const auto& spans = data->d_spans; !spans.empty()) { + auto iter = std::find_if(spans.cbegin(), spans.cend(), [](const auto& span) { return span.parent_span_id == pdns::trace::s_emptySpanID; }); + if (iter != spans.cend()) { + oldRootSpanID = iter->span_id; + } } } + if (oldRootSpanID == pdns::trace::s_emptySpanID) { return; } - d_oldAndNewRootSpanID.oldID = oldRootSpanID; - d_oldAndNewRootSpanID.newID = spanID; + + { + auto data = d_data.lock(); + data->d_oldAndNewRootSpanID.oldID = oldRootSpanID; + data->d_oldAndNewRootSpanID.newID = spanID; + } #endif } @@ -147,7 +159,7 @@ bool Tracer::setTraceAttribute([[maybe_unused]] const std::string& key, [[maybe_ // always successful return true; #else - d_attributes.push_back({key, value}); + d_data.lock()->d_attributes.push_back({key, value}); return true; #endif } @@ -155,18 +167,19 @@ bool Tracer::setTraceAttribute([[maybe_unused]] const std::string& key, [[maybe_ void Tracer::closeSpan([[maybe_unused]] const SpanID& spanID) { #ifndef DISABLE_PROTOBUF - auto lockedSpans = d_spans.lock(); + auto data = d_data.lock(); + auto& spans = data->d_spans; auto spanIt = std::find_if( - lockedSpans->rbegin(), - lockedSpans->rend(), - [spanID](const miniSpan& span) { return span.span_id == spanID; }); - if (spanIt != lockedSpans->rend() && spanIt->end_time_unix_nano == 0) { + spans.rbegin(), + spans.rend(), + [&spanID](const miniSpan& span) { return span.span_id == spanID; }); + if (spanIt != spans.rend() && spanIt->end_time_unix_nano == 0) { spanIt->end_time_unix_nano = pdns::trace::timestamp(); // Only closers are allowed, so this can never happen - assert(!d_spanIDStack.empty()); - assert(d_spanIDStack.back() == spanID); - d_spanIDStack.pop_back(); + assert(!data->d_spanIDStack.empty()); + assert(data->d_spanIDStack.back() == spanID); + data->d_spanIDStack.pop_back(); } #endif } @@ -182,11 +195,12 @@ void Tracer::setRootSpanAttribute([[maybe_unused]] const std::string& key, [[may void Tracer::setSpanAttribute([[maybe_unused]] const SpanID& spanid, [[maybe_unused]] const std::string& key, [[maybe_unused]] const AnyValue& value) { #ifndef DISABLE_PROTOBUF - auto lockedSpans = d_spans.lock(); - if (auto iter = std::find_if(lockedSpans->rbegin(), - lockedSpans->rend(), - [spanid](const auto& span) { return span.span_id == spanid; }); - iter != lockedSpans->rend()) { + auto data = d_data.lock(); + auto& spans = data->d_spans; + if (auto iter = std::find_if(spans.rbegin(), + spans.rend(), + [&spanid](const auto& span) { return span.span_id == spanid; }); + iter != spans.rend()) { iter->attributes.push_back({key, value}); } #endif @@ -197,13 +211,14 @@ SpanID Tracer::getRootSpanID() #ifdef DISABLE_PROTOBUF return 0; #else - if (d_oldAndNewRootSpanID.newID != pdns::trace::s_emptySpanID) { - return d_oldAndNewRootSpanID.newID; + auto data = d_data.read_only_lock(); + if (data->d_oldAndNewRootSpanID.newID != pdns::trace::s_emptySpanID) { + return data->d_oldAndNewRootSpanID.newID; } - if (auto spans = d_spans.read_only_lock(); !spans->empty()) { - auto iter = std::find_if(spans->cbegin(), spans->cend(), [](const auto& span) { return span.parent_span_id == pdns::trace::s_emptySpanID; }); - if (iter != spans->cend()) { + if (auto& spans = data->d_spans; !spans.empty()) { + auto iter = std::find_if(spans.cbegin(), spans.cend(), [](const auto& span) { return span.parent_span_id == pdns::trace::s_emptySpanID; }); + if (iter != spans.cend()) { return iter->span_id; } } @@ -216,13 +231,14 @@ SpanID Tracer::getLastSpanID() #ifdef DISABLE_PROTOBUF return 0; #else - if (d_spanIDStack.empty()) { + auto data = d_data.read_only_lock(); + if (data->d_spanIDStack.empty()) { return pdns::trace::s_emptySpanID; } - if (d_spanIDStack.size() == 1 && d_spanIDStack.front() == d_oldAndNewRootSpanID.oldID) { - return d_oldAndNewRootSpanID.newID; + if (data->d_spanIDStack.size() == 1 && data->d_spanIDStack.front() == data->d_oldAndNewRootSpanID.oldID) { + return data->d_oldAndNewRootSpanID.newID; } - return d_spanIDStack.back(); + return data->d_spanIDStack.back(); #endif } @@ -231,11 +247,12 @@ SpanID Tracer::getLastSpanIDForName([[maybe_unused]] const std::string& name) #ifdef DISABLE_PROTOBUF return 0; #else - if (auto lockedSpans = d_spans.read_only_lock(); !lockedSpans->empty()) { - if (auto iter = std::find_if(lockedSpans->rbegin(), - lockedSpans->rend(), + auto data = d_data.read_only_lock(); + if (auto& spans = data->d_spans; !spans.empty()) { + if (auto iter = std::find_if(spans.rbegin(), + spans.rend(), [name](const miniSpan& span) { return span.name == name; }); - iter != lockedSpans->rend()) { + iter != spans.rend()) { return iter->span_id; } } @@ -243,16 +260,17 @@ SpanID Tracer::getLastSpanIDForName([[maybe_unused]] const std::string& name) #endif } -TraceID Tracer::getTraceID() const +TraceID Tracer::getTraceID() { #ifdef DISABLE_PROTOBUF return 0; #else - auto lockedTraceID = d_traceid.lock(); - if (*lockedTraceID == pdns::trace::s_emptyTraceID) { - lockedTraceID->makeRandom(); + auto data = d_data.lock(); + auto& traceID = data->d_traceid; + if (traceID == pdns::trace::s_emptyTraceID) { + traceID.makeRandom(); } - return *lockedTraceID; + return traceID; #endif } diff --git a/pdns/dnsdistdist/dnsdist-opentelemetry.hh b/pdns/dnsdistdist/dnsdist-opentelemetry.hh index ff6d83e6ef..020806e78c 100644 --- a/pdns/dnsdistdist/dnsdist-opentelemetry.hh +++ b/pdns/dnsdistdist/dnsdist-opentelemetry.hh @@ -155,7 +155,7 @@ public: /** * @brief Retrieve the TraceID for this Tracer */ - [[nodiscard]] TraceID getTraceID() const; + [[nodiscard]] TraceID getTraceID(); /** * @brief Generate the TracesData from all data in this Tracer @@ -305,37 +305,39 @@ private: std::vector attributes; }; - /** - * @brief Stores all miniSpans. - */ - LockGuarded> d_spans; + struct Data + { + /** + * @brief Stores all miniSpans. + */ + std::vector d_spans; - /** - * @brief All attributes related to this Trace (added to the ScopeSpan) - */ - std::vector d_attributes; + /** + * @brief All attributes related to this Trace (added to the ScopeSpan) + */ + std::vector d_attributes; - /** - * @brief The TraceID for this Tracer. It is stable for the lifetime of the Tracer - * - * it is mutable because it is set the first time it is accessed - */ - mutable LockGuarded d_traceid{}; + /** + * @brief The TraceID for this Tracer. It is stable for the lifetime of the Tracer + */ + TraceID d_traceid{}; - /** - * @brief A stack of SpanID's that tracks the "stack" of SpanIDs - */ - std::vector d_spanIDStack; + /** + * @brief A stack of SpanID's that tracks the "stack" of SpanIDs + */ + std::vector d_spanIDStack; - /** - * Set when setRootSpanID is called, used to replace the - * root span id (and the parent span ids) when the PB is generated - */ - struct - { - SpanID oldID; - SpanID newID; - } d_oldAndNewRootSpanID; + /** + * Set when setRootSpanID is called, used to replace the + * root span id (and the parent span ids) when the PB is generated + */ + struct + { + SpanID oldID; + SpanID newID; + } d_oldAndNewRootSpanID; + }; + LockGuarded d_data; #endif }; } // namespace pdns::trace::dnsdist