]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
dnsdist: Protect more OT Tracer data behind the lock
authorRemi Gacogne <remi.gacogne@powerdns.com>
Fri, 23 Jan 2026 16:07:11 +0000 (17:07 +0100)
committerRemi Gacogne <remi.gacogne@powerdns.com>
Fri, 23 Jan 2026 16:07:11 +0000 (17:07 +0100)
I was investigating a crash that occurred on our CI:

```
2026-01-23T14:33:07.1755774Z === configs/dnsdist_TestOpenTelemetryTracingStripIncomingTraceParent.log ===
2026-01-23T14:33:07.1757303Z msg="dnsdist 0.0.0-git1 comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it according to the terms of the GPL version 2" subsystem="setup" level="0" prio="Info" ts="1769178505.183"
2026-01-23T14:33:07.1758153Z msg="Raised send buffer size" subsystem="setup" level="0" prio="Info" ts="1769178505.203" frontend.address="127.0.0.1:14303" network.send_buffer_size="212992"
2026-01-23T14:33:07.1758909Z msg="Raised receive buffer size" subsystem="setup" level="0" prio="Info" ts="1769178505.203" buffer_size="1048576" frontend.address="127.0.0.1:14303"
2026-01-23T14:33:07.1759563Z msg="Listening on Do53 frontend" subsystem="setup" level="0" prio="Info" ts="1769178505.203" frontend.address="127.0.0.1:14303"
2026-01-23T14:33:07.1760608Z msg="Allowing queries from" subsystem="setup" level="0" prio="Info" ts="1769178505.204" acl="10.0.0.0/8, 100.64.0.0/10, 127.0.0.0/8, 169.254.0.0/16, 172.16.0.0/12, 192.168.0.0/16, ::1/128, fc00::/7, fe80::/10"
2026-01-23T14:33:07.1761243Z msg="Allowing console connections from" subsystem="setup" level="0" prio="Info" ts="1769178505.204" acl="127.0.0.0/8, ::1/128"
2026-01-23T14:33:07.1762396Z msg="Setting initial status for backend" subsystem="backend" level="0" prio="Info" ts="1769178505.209" backend.address="127.0.0.1:14002" backend.health_check.status="up" backend.name="" backend.protocol="DoUDP"
2026-01-23T14:33:07.1763777Z dnsdist: ../../../../../../tmp/dnsdist-meson-dist-build/meson-dist/dnsdist-0.0.0-git1/dnsdist-opentelemetry.cc:168: void pdns::trace::dnsdist::Tracer::closeSpan(const SpanID &): Assertion `d_spanIDStack.back() == spanID' failed.
```

While trying to work out how this condition could fail, I quickly realized
it was hard for me to follow which fields were protected behind a lock and
which weren't, and in some cases it looked like there could be a race.
Since performance is not critical in this code, and I would rather trade
correctness for performance whenever possible anyway, this commit is
moving all the related fields behind the lock.
It might or might not fix the issue, as I haven't been able to reproduce it
yet, but in any case I believe it will make it easier to reason about it.

Signed-off-by: Remi Gacogne <remi.gacogne@powerdns.com>
pdns/dnsdistdist/dnsdist-opentelemetry.cc
pdns/dnsdistdist/dnsdist-opentelemetry.hh

index b7232f96bcf9c5882e39eb7a5eb94140536dbda6..0d7a2eb12b9d15138bb5bcc19d4fcb50492c9e34 100644 (file)
@@ -41,38 +41,39 @@ TracesData Tracer::getTracesData()
 #ifdef DISABLE_PROTOBUF
   return 0;
 #else
-  auto otTrace = pdns::trace::TracesData{
-    .resource_spans = {
-      {.resource = {
-         .attributes = {
-           {"service.name", {"dnsdist"}},
-         }},
-       .scope_spans = {{.scope = {
-                          .name = "dnsdist/queryFromFrontend",
-                          .version = PACKAGE_VERSION,
-                          .attributes = {d_attributes.begin(), d_attributes.end()},
-                        },
-                        .spans = {}}}}}};
+  auto traceid = getTraceID();
+  {
+    auto data = d_data.read_only_lock();
+    auto otTrace = pdns::trace::TracesData{
+      .resource_spans = {
+        {.resource = {
+            .attributes = {
+              {"service.name", {"dnsdist"}},
+            }},
+         .scope_spans = {{.scope = {
+                .name = "dnsdist/queryFromFrontend",
+                .version = PACKAGE_VERSION,
+                .attributes = {data->d_attributes.cbegin(), data->d_attributes.cend()},
+              },
+             .spans = {}}}}}};
 
   otTrace.resource_spans.at(0).scope_spans.at(0).scope.attributes.push_back(hostnameAttr);
 
-  auto traceid = getTraceID();
-
-  for (auto const& lockedSpan : *d_spans.read_only_lock()) {
-    otTrace.resource_spans.at(0).scope_spans.at(0).spans.push_back(
-      {
-        .trace_id = traceid,
-        .span_id = lockedSpan.span_id == d_oldAndNewRootSpanID.oldID ? d_oldAndNewRootSpanID.newID : lockedSpan.span_id,
-        .parent_span_id = lockedSpan.parent_span_id == d_oldAndNewRootSpanID.oldID ? d_oldAndNewRootSpanID.newID : lockedSpan.parent_span_id,
-        .name = lockedSpan.name,
-        .kind = pdns::trace::Span::SpanKind::SPAN_KIND_SERVER,
-        .start_time_unix_nano = lockedSpan.start_time_unix_nano,
-        .end_time_unix_nano = lockedSpan.end_time_unix_nano,
-        .attributes = lockedSpan.attributes,
-      });
+    for (auto const& span : data->d_spans) {
+      otTrace.resource_spans.at(0).scope_spans.at(0).spans.push_back(
+        {
+          .trace_id = traceid,
+          .span_id = span.span_id == data->d_oldAndNewRootSpanID.oldID ? data->d_oldAndNewRootSpanID.newID : span.span_id,
+          .parent_span_id = span.parent_span_id == data->d_oldAndNewRootSpanID.oldID ? data->d_oldAndNewRootSpanID.newID : span.parent_span_id,
+          .name = span.name,
+          .kind = pdns::trace::Span::SpanKind::SPAN_KIND_SERVER,
+          .start_time_unix_nano = span.start_time_unix_nano,
+          .end_time_unix_nano = span.end_time_unix_nano,
+          .attributes = span.attributes,
+        });
+    }
+    return otTrace;
   }
-
-  return otTrace;
 #endif
 }
 
@@ -101,16 +102,19 @@ SpanID Tracer::addSpan([[maybe_unused]] const std::string& name, [[maybe_unused]
   return 0;
 #else
   auto spanID = pdns::trace::SpanID::getRandomSpanID();
-  d_spans.lock()->push_back({
-    .name = name,
-    .span_id = spanID,
-    .parent_span_id = parentSpanID,
-    .start_time_unix_nano = pdns::trace::timestamp(),
-    .end_time_unix_nano = 0,
-    .attributes = {},
-  });
-
-  d_spanIDStack.push_back(spanID);
+  {
+    auto data = d_data.lock();
+    data->d_spans.push_back({
+        .name = name,
+        .span_id = spanID,
+        .parent_span_id = parentSpanID,
+        .start_time_unix_nano = pdns::trace::timestamp(),
+        .end_time_unix_nano = 0,
+        .attributes = {},
+        });
+
+    data->d_spanIDStack.emplace_back(spanID);
+  }
   return spanID;
 #endif
 }
@@ -118,25 +122,33 @@ SpanID Tracer::addSpan([[maybe_unused]] const std::string& name, [[maybe_unused]
 void Tracer::setTraceID([[maybe_unused]] const TraceID& traceID)
 {
 #ifndef DISABLE_PROTOBUF
-  *d_traceid.lock() = traceID;
+  d_data.lock()->d_traceid = traceID;
 #endif
 }
 
 void Tracer::setRootSpanID([[maybe_unused]] const SpanID& spanID)
 {
 #ifndef DISABLE_PROTOBUF
-  SpanID oldRootSpanID;
-  if (auto spans = d_spans.read_only_lock(); !spans->empty()) {
-    auto iter = std::find_if(spans->cbegin(), spans->cend(), [](const auto& span) { return span.parent_span_id == pdns::trace::s_emptySpanID; });
-    if (iter != spans->cend()) {
-      oldRootSpanID = iter->span_id;
+  SpanID oldRootSpanID{pdns::trace::s_emptySpanID};
+  {
+    auto data = d_data.read_only_lock();
+    if (const auto& spans = data->d_spans; !spans.empty()) {
+      auto iter = std::find_if(spans.cbegin(), spans.cend(), [](const auto& span) { return span.parent_span_id == pdns::trace::s_emptySpanID; });
+      if (iter != spans.cend()) {
+        oldRootSpanID = iter->span_id;
+      }
     }
   }
+
   if (oldRootSpanID == pdns::trace::s_emptySpanID) {
     return;
   }
-  d_oldAndNewRootSpanID.oldID = oldRootSpanID;
-  d_oldAndNewRootSpanID.newID = spanID;
+
+  {
+    auto data = d_data.lock();
+    data->d_oldAndNewRootSpanID.oldID = oldRootSpanID;
+    data->d_oldAndNewRootSpanID.newID = spanID;
+  }
 #endif
 }
 
@@ -147,7 +159,7 @@ bool Tracer::setTraceAttribute([[maybe_unused]] const std::string& key, [[maybe_
   // always successful
   return true;
 #else
-  d_attributes.push_back({key, value});
+  d_data.lock()->d_attributes.push_back({key, value});
   return true;
 #endif
 }
@@ -155,18 +167,19 @@ bool Tracer::setTraceAttribute([[maybe_unused]] const std::string& key, [[maybe_
 void Tracer::closeSpan([[maybe_unused]] const SpanID& spanID)
 {
 #ifndef DISABLE_PROTOBUF
-  auto lockedSpans = d_spans.lock();
+  auto data = d_data.lock();
+  auto& spans = data->d_spans;
   auto spanIt = std::find_if(
-    lockedSpans->rbegin(),
-    lockedSpans->rend(),
-    [spanID](const miniSpan& span) { return span.span_id == spanID; });
-  if (spanIt != lockedSpans->rend() && spanIt->end_time_unix_nano == 0) {
+    spans.rbegin(),
+    spans.rend(),
+    [&spanID](const miniSpan& span) { return span.span_id == spanID; });
+  if (spanIt != spans.rend() && spanIt->end_time_unix_nano == 0) {
     spanIt->end_time_unix_nano = pdns::trace::timestamp();
 
     // Only closers are allowed, so this can never happen
-    assert(!d_spanIDStack.empty());
-    assert(d_spanIDStack.back() == spanID);
-    d_spanIDStack.pop_back();
+    assert(!data->d_spanIDStack.empty());
+    assert(data->d_spanIDStack.back() == spanID);
+    data->d_spanIDStack.pop_back();
   }
 #endif
 }
@@ -182,11 +195,12 @@ void Tracer::setRootSpanAttribute([[maybe_unused]] const std::string& key, [[may
 void Tracer::setSpanAttribute([[maybe_unused]] const SpanID& spanid, [[maybe_unused]] const std::string& key, [[maybe_unused]] const AnyValue& value)
 {
 #ifndef DISABLE_PROTOBUF
-  auto lockedSpans = d_spans.lock();
-  if (auto iter = std::find_if(lockedSpans->rbegin(),
-                               lockedSpans->rend(),
-                               [spanid](const auto& span) { return span.span_id == spanid; });
-      iter != lockedSpans->rend()) {
+  auto data = d_data.lock();
+  auto& spans = data->d_spans;
+  if (auto iter = std::find_if(spans.rbegin(),
+                               spans.rend(),
+                               [&spanid](const auto& span) { return span.span_id == spanid; });
+      iter != spans.rend()) {
     iter->attributes.push_back({key, value});
   }
 #endif
@@ -197,13 +211,14 @@ SpanID Tracer::getRootSpanID()
 #ifdef DISABLE_PROTOBUF
   return 0;
 #else
-  if (d_oldAndNewRootSpanID.newID != pdns::trace::s_emptySpanID) {
-    return d_oldAndNewRootSpanID.newID;
+  auto data = d_data.read_only_lock();
+  if (data->d_oldAndNewRootSpanID.newID != pdns::trace::s_emptySpanID) {
+    return data->d_oldAndNewRootSpanID.newID;
   }
 
-  if (auto spans = d_spans.read_only_lock(); !spans->empty()) {
-    auto iter = std::find_if(spans->cbegin(), spans->cend(), [](const auto& span) { return span.parent_span_id == pdns::trace::s_emptySpanID; });
-    if (iter != spans->cend()) {
+  if (auto& spans = data->d_spans; !spans.empty()) {
+    auto iter = std::find_if(spans.cbegin(), spans.cend(), [](const auto& span) { return span.parent_span_id == pdns::trace::s_emptySpanID; });
+    if (iter != spans.cend()) {
       return iter->span_id;
     }
   }
@@ -216,13 +231,14 @@ SpanID Tracer::getLastSpanID()
 #ifdef DISABLE_PROTOBUF
   return 0;
 #else
-  if (d_spanIDStack.empty()) {
+  auto data = d_data.read_only_lock();
+  if (data->d_spanIDStack.empty()) {
     return pdns::trace::s_emptySpanID;
   }
-  if (d_spanIDStack.size() == 1 && d_spanIDStack.front() == d_oldAndNewRootSpanID.oldID) {
-    return d_oldAndNewRootSpanID.newID;
+  if (data->d_spanIDStack.size() == 1 && data->d_spanIDStack.front() == data->d_oldAndNewRootSpanID.oldID) {
+    return data->d_oldAndNewRootSpanID.newID;
   }
-  return d_spanIDStack.back();
+  return data->d_spanIDStack.back();
 #endif
 }
 
@@ -231,11 +247,12 @@ SpanID Tracer::getLastSpanIDForName([[maybe_unused]] const std::string& name)
 #ifdef DISABLE_PROTOBUF
   return 0;
 #else
-  if (auto lockedSpans = d_spans.read_only_lock(); !lockedSpans->empty()) {
-    if (auto iter = std::find_if(lockedSpans->rbegin(),
-                                 lockedSpans->rend(),
+  auto data = d_data.read_only_lock();
+  if (auto& spans = data->d_spans; !spans.empty()) {
+    if (auto iter = std::find_if(spans.rbegin(),
+                                 spans.rend(),
                                  [name](const miniSpan& span) { return span.name == name; });
-        iter != lockedSpans->rend()) {
+        iter != spans.rend()) {
       return iter->span_id;
     }
   }
@@ -243,16 +260,17 @@ SpanID Tracer::getLastSpanIDForName([[maybe_unused]] const std::string& name)
 #endif
 }
 
-TraceID Tracer::getTraceID() const
+TraceID Tracer::getTraceID()
 {
 #ifdef DISABLE_PROTOBUF
   return 0;
 #else
-  auto lockedTraceID = d_traceid.lock();
-  if (*lockedTraceID == pdns::trace::s_emptyTraceID) {
-    lockedTraceID->makeRandom();
+  auto data = d_data.lock();
+  auto& traceID = data->d_traceid;
+  if (traceID == pdns::trace::s_emptyTraceID) {
+    traceID.makeRandom();
   }
-  return *lockedTraceID;
+  return traceID;
 #endif
 }
 
index ff6d83e6ef9c0471223ae2123b7a46f1cc4ea2fe..020806e78c17a9af4b92305cba6e7b1b8c4f7e0f 100644 (file)
@@ -155,7 +155,7 @@ public:
   /**
    * @brief Retrieve the TraceID for this Tracer
    */
-  [[nodiscard]] TraceID getTraceID() const;
+  [[nodiscard]] TraceID getTraceID();
 
   /**
    * @brief Generate the TracesData from all data in this Tracer
@@ -305,37 +305,39 @@ private:
     std::vector<pdns::trace::KeyValue> attributes;
   };
 
-  /**
-   * @brief Stores all miniSpans.
-   */
-  LockGuarded<std::vector<miniSpan>> d_spans;
+  struct Data
+  {
+    /**
+     * @brief Stores all miniSpans.
+     */
+    std::vector<miniSpan> d_spans;
 
-  /**
-   * @brief All attributes related to this Trace (added to the ScopeSpan)
-   */
-  std::vector<pdns::trace::KeyValue> d_attributes;
+    /**
+     * @brief All attributes related to this Trace (added to the ScopeSpan)
+     */
+    std::vector<pdns::trace::KeyValue> d_attributes;
 
-  /**
-   * @brief The TraceID for this Tracer. It is stable for the lifetime of the Tracer
-   *
-   * it is mutable because it is set the first time it is accessed
-   */
-  mutable LockGuarded<TraceID> d_traceid{};
+    /**
+     * @brief The TraceID for this Tracer. It is stable for the lifetime of the Tracer
+     */
+    TraceID d_traceid{};
 
-  /**
-   * @brief A stack of SpanID's that tracks the "stack" of SpanIDs
-   */
-  std::vector<SpanID> d_spanIDStack;
+    /**
+     * @brief A stack of SpanID's that tracks the "stack" of SpanIDs
+     */
+    std::vector<SpanID> d_spanIDStack;
 
-  /**
-   * Set when setRootSpanID is called, used to replace the
-   * root span id (and the parent span ids) when the PB is generated
-   */
-  struct
-  {
-    SpanID oldID;
-    SpanID newID;
-  } d_oldAndNewRootSpanID;
+    /**
+     * Set when setRootSpanID is called, used to replace the
+     * root span id (and the parent span ids) when the PB is generated
+     */
+    struct
+    {
+      SpanID oldID;
+      SpanID newID;
+    } d_oldAndNewRootSpanID;
+  };
+  LockGuarded<Data> d_data;
 #endif
 };
 } // namespace pdns::trace::dnsdist