From 0ac246e2e848bf244506b7d6a7031fb7d0de335b Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Thu, 24 Dec 2020 11:38:26 +0100 Subject: [PATCH] dnsdist: Fix a hang when removing a server with more than one socket There was a lock starvation issue when removing a server with more than one socket in use (`sockets` greater than 1 on the corresponding `newServer` directive), because the mutex protecting the sockets array would never be released long enough by the responder thread to allow the thread stopping the server to acquire it. This commit fixes that by marking the server as stopped right away, before acquiring the lock, and also making sure that the responder thread is woken up regularly (every second, even without any query to process) and that it checks whether the server has been stopped just after that. The issue was introduced in be55a20ce9bb7140071279d70bcb460f1f2b7b7d, and backported to 1.5.1 in f0d48318cce0dd80ae73c529362bdb2921d8c5c9. --- pdns/dnsdist.cc | 8 ++++++-- pdns/dnsdistdist/dnsdist-backend.cc | 19 +++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/pdns/dnsdist.cc b/pdns/dnsdist.cc index 39cb2e197e..c6f01ee202 100644 --- a/pdns/dnsdist.cc +++ b/pdns/dnsdist.cc @@ -535,7 +535,7 @@ static void pickBackendSocketsReadyForReceiving(const std::shared_ptr lock(state->socketsLock); - state->mplexer->getAvailableFDs(ready, -1); + state->mplexer->getAvailableFDs(ready, 1000); } } @@ -555,10 +555,14 @@ try { std::vector sockets; sockets.reserve(dss->sockets.size()); - for(; !dss->isStopped(); ) { + for(;;) { dnsheader* dh = reinterpret_cast(packet); try { pickBackendSocketsReadyForReceiving(dss, sockets); + if (dss->isStopped()) { + break; + } + for (const auto& fd : sockets) { ssize_t got = recv(fd, packet, sizeof(packet), 0); char * response = packet; diff --git a/pdns/dnsdistdist/dnsdist-backend.cc b/pdns/dnsdistdist/dnsdist-backend.cc index 2eb19a8c59..091839b414 100644 --- a/pdns/dnsdistdist/dnsdist-backend.cc +++ b/pdns/dnsdistdist/dnsdist-backend.cc @@ -26,8 +26,8 @@ bool DownstreamState::reconnect() { std::unique_lock tl(connectLock, std::try_to_lock); - if (!tl.owns_lock()) { - /* we are already reconnecting */ + if (!tl.owns_lock() || isStopped()) { + /* we are already reconnecting or stopped anyway */ return false; } @@ -101,14 +101,17 @@ bool DownstreamState::reconnect() void DownstreamState::stop() { - std::unique_lock tl(connectLock); - std::lock_guard slock(socketsLock); d_stopped = true; - for (auto& fd : sockets) { - if (fd != -1) { - /* shutdown() is needed to wake up recv() in the responderThread */ - shutdown(fd, SHUT_RDWR); + { + std::lock_guard tl(connectLock); + std::lock_guard slock(socketsLock); + + for (auto& fd : sockets) { + if (fd != -1) { + /* shutdown() is needed to wake up recv() in the responderThread */ + shutdown(fd, SHUT_RDWR); + } } } } -- 2.47.2