From 3594ea139e032cabf555ecde6c696e90921ce39f Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Fri, 5 Apr 2024 12:44:17 +0200 Subject: [PATCH] dnsdist: Fix a crash in the Downstream TCP handler when we are looking for an existing TCP connection to a backend to reuse, we routinely (every 60s by default) clean up existing connections from the cache. 7b5f590ee72fecf54c0c40b24e98ba03a406af53 removes a connection from the cache more aggressively when it has failed, but I did not notice that the same function might be called from the cache cleaning algorithm. It caused the cache cleanup function to call this function which in turns tried to remove the connection from the same cache, invalidating the iterator of the cache algorithm, and causing a crash when the function returned. --- pdns/dnsdistdist/dnsdist-downstream-connection.hh | 6 +++--- pdns/dnsdistdist/dnsdist-nghttp2.cc | 3 ++- pdns/dnsdistdist/dnsdist-tcp-downstream.cc | 13 +++++++------ pdns/dnsdistdist/dnsdist-tcp-downstream.hh | 4 ++-- pdns/dnsdistdist/dnsdist-tcp.cc | 4 ++-- pdns/dnsdistdist/test-dnsdist-connections-cache.cc | 2 +- 6 files changed, 17 insertions(+), 15 deletions(-) diff --git a/pdns/dnsdistdist/dnsdist-downstream-connection.hh b/pdns/dnsdistdist/dnsdist-downstream-connection.hh index f13cbcff22..8d50de369f 100644 --- a/pdns/dnsdistdist/dnsdist-downstream-connection.hh +++ b/pdns/dnsdistdist/dnsdist-downstream-connection.hh @@ -203,7 +203,7 @@ public: if (backendIt->second.d_idles.size() >= s_maxIdleConnectionsPerDownstream) { auto old = backendIt->second.d_idles.template get().back(); - old->release(); + old->release(false); backendIt->second.d_idles.template get().pop_back(); } @@ -231,7 +231,7 @@ protected: if (entry->isIdle() && entry->getLastDataReceivedTime() < idleCutOff) { /* idle for too long */ - (*connIt)->release(); + (*connIt)->release(false); connIt = sidx.erase(connIt); continue; } @@ -242,7 +242,7 @@ protected: } if (entry->isIdle()) { - (*connIt)->release(); + (*connIt)->release(false); } connIt = sidx.erase(connIt); } diff --git a/pdns/dnsdistdist/dnsdist-nghttp2.cc b/pdns/dnsdistdist/dnsdist-nghttp2.cc index bf1666f842..68d4f92962 100644 --- a/pdns/dnsdistdist/dnsdist-nghttp2.cc +++ b/pdns/dnsdistdist/dnsdist-nghttp2.cc @@ -70,8 +70,9 @@ public: bool reachedMaxConcurrentQueries() const override; bool reachedMaxStreamID() const override; bool isIdle() const override; - void release() override + void release(bool removeFromCache) override { + (void)removeFromCache; } private: diff --git a/pdns/dnsdistdist/dnsdist-tcp-downstream.cc b/pdns/dnsdistdist/dnsdist-tcp-downstream.cc index cf02303e9e..0e828939d5 100644 --- a/pdns/dnsdistdist/dnsdist-tcp-downstream.cc +++ b/pdns/dnsdistdist/dnsdist-tcp-downstream.cc @@ -137,7 +137,8 @@ TCPConnectionToBackend::~TCPConnectionToBackend() } } -void TCPConnectionToBackend::release(){ +void TCPConnectionToBackend::release(bool removeFromCache) +{ d_ds->outstanding -= d_pendingResponses.size(); d_pendingResponses.clear(); @@ -147,8 +148,8 @@ void TCPConnectionToBackend::release(){ d_ioState.reset(); } - auto shared = std::dynamic_pointer_cast(shared_from_this()); - if (!willBeReusable(true)) { + if (removeFromCache && !willBeReusable(true)) { + auto shared = std::dynamic_pointer_cast(shared_from_this()); /* remove ourselves from the connection cache, this might mean that our reference count drops to zero after that, so we need to be careful */ t_downstreamTCPConnectionsManager.removeDownstreamConnection(shared); @@ -371,7 +372,7 @@ void TCPConnectionToBackend::handleIO(std::shared_ptr& c catch (const std::exception& e) { vinfolog("Got an exception while handling TCP response from %s (client is %s): %s", conn->d_ds ? conn->d_ds->getNameWithAddr() : "unknown", conn->d_currentQuery.d_query.d_idstate.origRemote.toStringWithPort(), e.what()); ioGuard.release(); - conn->release(); + conn->release(true); return; } } @@ -584,7 +585,7 @@ void TCPConnectionToBackend::handleTimeout(const struct timeval& now, bool write vinfolog("Got exception while notifying a timeout"); } - release(); + release(true); } void TCPConnectionToBackend::notifyAllQueriesFailed(const struct timeval& now, FailureReason reason) @@ -647,7 +648,7 @@ void TCPConnectionToBackend::notifyAllQueriesFailed(const struct timeval& now, F vinfolog("Got exception while notifying"); } - release(); + release(true); } IOState TCPConnectionToBackend::handleResponse(std::shared_ptr& conn, const struct timeval& now) diff --git a/pdns/dnsdistdist/dnsdist-tcp-downstream.hh b/pdns/dnsdistdist/dnsdist-tcp-downstream.hh index a165dc18cb..d2447e8b9c 100644 --- a/pdns/dnsdistdist/dnsdist-tcp-downstream.hh +++ b/pdns/dnsdistdist/dnsdist-tcp-downstream.hh @@ -119,7 +119,7 @@ public: virtual bool reachedMaxStreamID() const = 0; virtual bool reachedMaxConcurrentQueries() const = 0; virtual bool isIdle() const = 0; - virtual void release() = 0; + virtual void release(bool removeFromCache) = 0; virtual void stopIO() { } @@ -256,7 +256,7 @@ public: void queueQuery(std::shared_ptr& sender, TCPQuery&& query) override; void handleTimeout(const struct timeval& now, bool write) override; - void release() override; + void release(bool removeFromCache) override; std::string toString() const override { diff --git a/pdns/dnsdistdist/dnsdist-tcp.cc b/pdns/dnsdistdist/dnsdist-tcp.cc index da7e4e2232..27b2a116e0 100644 --- a/pdns/dnsdistdist/dnsdist-tcp.cc +++ b/pdns/dnsdistdist/dnsdist-tcp.cc @@ -342,7 +342,7 @@ void IncomingTCPConnectionState::terminateClientConnection() we don't care about the ones still waiting for responses */ for (auto& backend : d_ownedConnectionsToBackend) { for (auto& conn : backend.second) { - conn->release(); + conn->release(true); } } d_ownedConnectionsToBackend.clear(); @@ -475,7 +475,7 @@ void IncomingTCPConnectionState::handleResponse(const struct timeval& now, TCPRe for (auto it = list.begin(); it != list.end(); ++it) { if (*it == response.d_connection) { try { - response.d_connection->release(); + response.d_connection->release(true); } catch (const std::exception& e) { vinfolog("Error releasing connection: %s", e.what()); diff --git a/pdns/dnsdistdist/test-dnsdist-connections-cache.cc b/pdns/dnsdistdist/test-dnsdist-connections-cache.cc index b6f86b6525..fa113aa83a 100644 --- a/pdns/dnsdistdist/test-dnsdist-connections-cache.cc +++ b/pdns/dnsdistdist/test-dnsdist-connections-cache.cc @@ -71,7 +71,7 @@ public: { } - void release() + void release(bool removeFomCache) { } -- 2.47.2