From: Remi Gacogne Date: Thu, 30 Sep 2021 13:42:25 +0000 (+0200) Subject: dnsdist: Better handling of exceptions in the TCP/DoH workers X-Git-Tag: dnsdist-1.7.0-alpha2~10^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=55a2979f0ed6128bc03b0a12139c6f8c32d87b4f;p=thirdparty%2Fpdns.git dnsdist: Better handling of exceptions in the TCP/DoH workers Exceptions might be raised when the process is exiting because the internal pipes have been closed by the remote end, for example when coverage mode is enabled. --- diff --git a/pdns/dnsdist-tcp.cc b/pdns/dnsdist-tcp.cc index 23ccce62d6..9f87070df0 100644 --- a/pdns/dnsdist-tcp.cc +++ b/pdns/dnsdist-tcp.cc @@ -1207,91 +1207,101 @@ static void tcpClientThread(int pipefd, int crossProtocolQueriesPipeFD, int cros setThreadName("dnsdist/tcpClie"); - TCPClientThreadData data; - /* this is the writing end! */ - data.crossProtocolResponsesPipe = crossProtocolResponsesWritePipeFD; - data.mplexer->addReadFD(pipefd, handleIncomingTCPQuery, &data); - data.mplexer->addReadFD(crossProtocolQueriesPipeFD, handleCrossProtocolQuery, &data); - data.mplexer->addReadFD(crossProtocolResponsesListenPipeFD, handleCrossProtocolResponse, &data); - - struct timeval now; - gettimeofday(&now, nullptr); - time_t lastTimeoutScan = now.tv_sec; - - for (;;) { - data.mplexer->run(&now); - - DownstreamConnectionsManager::cleanupClosedTCPConnections(now); - - if (now.tv_sec > lastTimeoutScan) { - lastTimeoutScan = now.tv_sec; - auto expiredReadConns = data.mplexer->getTimeouts(now, false); - for (const auto& cbData : expiredReadConns) { - if (cbData.second.type() == typeid(std::shared_ptr)) { - auto state = boost::any_cast>(cbData.second); - if (cbData.first == state->d_handler.getDescriptor()) { - vinfolog("Timeout (read) from remote TCP client %s", state->d_ci.remote.toStringWithPort()); - state->handleTimeout(state, false); - } - } - else if (cbData.second.type() == typeid(std::shared_ptr)) { - auto conn = boost::any_cast>(cbData.second); - vinfolog("Timeout (read) from remote backend %s", conn->getBackendName()); - conn->handleTimeout(now, false); - } - } - - auto expiredWriteConns = data.mplexer->getTimeouts(now, true); - for (const auto& cbData : expiredWriteConns) { - if (cbData.second.type() == typeid(std::shared_ptr)) { - auto state = boost::any_cast>(cbData.second); - if (cbData.first == state->d_handler.getDescriptor()) { - vinfolog("Timeout (write) from remote TCP client %s", state->d_ci.remote.toStringWithPort()); - state->handleTimeout(state, true); - } - } - else if (cbData.second.type() == typeid(std::shared_ptr)) { - auto conn = boost::any_cast>(cbData.second); - vinfolog("Timeout (write) from remote backend %s", conn->getBackendName()); - conn->handleTimeout(now, true); - } - } + try { + TCPClientThreadData data; + /* this is the writing end! */ + data.crossProtocolResponsesPipe = crossProtocolResponsesWritePipeFD; + data.mplexer->addReadFD(pipefd, handleIncomingTCPQuery, &data); + data.mplexer->addReadFD(crossProtocolQueriesPipeFD, handleCrossProtocolQuery, &data); + data.mplexer->addReadFD(crossProtocolResponsesListenPipeFD, handleCrossProtocolResponse, &data); - if (g_tcpStatesDumpRequested > 0) { - /* just to keep things clean in the output, debug only */ - static std::mutex s_lock; - std::lock_guard lck(s_lock); - if (g_tcpStatesDumpRequested > 0) { - /* no race here, we took the lock so it can only be increased in the meantime */ - --g_tcpStatesDumpRequested; - errlog("Dumping the TCP states, as requested:"); - data.mplexer->runForAllWatchedFDs([](bool isRead, int fd, const FDMultiplexer::funcparam_t& param, struct timeval ttd) - { - struct timeval lnow; - gettimeofday(&lnow, nullptr); - if (ttd.tv_sec > 0) { - errlog("- Descriptor %d is in %s state, TTD in %d", fd, (isRead ? "read" : "write"), (ttd.tv_sec-lnow.tv_sec)); + struct timeval now; + gettimeofday(&now, nullptr); + time_t lastTimeoutScan = now.tv_sec; + + for (;;) { + data.mplexer->run(&now); + + try { + DownstreamConnectionsManager::cleanupClosedTCPConnections(now); + + if (now.tv_sec > lastTimeoutScan) { + lastTimeoutScan = now.tv_sec; + auto expiredReadConns = data.mplexer->getTimeouts(now, false); + for (const auto& cbData : expiredReadConns) { + if (cbData.second.type() == typeid(std::shared_ptr)) { + auto state = boost::any_cast>(cbData.second); + if (cbData.first == state->d_handler.getDescriptor()) { + vinfolog("Timeout (read) from remote TCP client %s", state->d_ci.remote.toStringWithPort()); + state->handleTimeout(state, false); + } } - else { - errlog("- Descriptor %d is in %s state, no TTD set", fd, (isRead ? "read" : "write")); + else if (cbData.second.type() == typeid(std::shared_ptr)) { + auto conn = boost::any_cast>(cbData.second); + vinfolog("Timeout (read) from remote backend %s", conn->getBackendName()); + conn->handleTimeout(now, false); } + } - if (param.type() == typeid(std::shared_ptr)) { - auto state = boost::any_cast>(param); - errlog(" - %s", state->toString()); + auto expiredWriteConns = data.mplexer->getTimeouts(now, true); + for (const auto& cbData : expiredWriteConns) { + if (cbData.second.type() == typeid(std::shared_ptr)) { + auto state = boost::any_cast>(cbData.second); + if (cbData.first == state->d_handler.getDescriptor()) { + vinfolog("Timeout (write) from remote TCP client %s", state->d_ci.remote.toStringWithPort()); + state->handleTimeout(state, true); + } } - else if (param.type() == typeid(std::shared_ptr)) { - auto conn = boost::any_cast>(param); - errlog(" - %s", conn->toString()); + else if (cbData.second.type() == typeid(std::shared_ptr)) { + auto conn = boost::any_cast>(cbData.second); + vinfolog("Timeout (write) from remote backend %s", conn->getBackendName()); + conn->handleTimeout(now, true); } - else if (param.type() == typeid(TCPClientThreadData*)) { - errlog(" - Worker thread pipe"); + } + + if (g_tcpStatesDumpRequested > 0) { + /* just to keep things clean in the output, debug only */ + static std::mutex s_lock; + std::lock_guard lck(s_lock); + if (g_tcpStatesDumpRequested > 0) { + /* no race here, we took the lock so it can only be increased in the meantime */ + --g_tcpStatesDumpRequested; + errlog("Dumping the TCP states, as requested:"); + data.mplexer->runForAllWatchedFDs([](bool isRead, int fd, const FDMultiplexer::funcparam_t& param, struct timeval ttd) + { + struct timeval lnow; + gettimeofday(&lnow, nullptr); + if (ttd.tv_sec > 0) { + errlog("- Descriptor %d is in %s state, TTD in %d", fd, (isRead ? "read" : "write"), (ttd.tv_sec-lnow.tv_sec)); + } + else { + errlog("- Descriptor %d is in %s state, no TTD set", fd, (isRead ? "read" : "write")); + } + + if (param.type() == typeid(std::shared_ptr)) { + auto state = boost::any_cast>(param); + errlog(" - %s", state->toString()); + } + else if (param.type() == typeid(std::shared_ptr)) { + auto conn = boost::any_cast>(param); + errlog(" - %s", conn->toString()); + } + else if (param.type() == typeid(TCPClientThreadData*)) { + errlog(" - Worker thread pipe"); + } + }); } - }); + } } } + catch (const std::exception& e) { + errlog("Error in TCP worker thread: %s", e.what()); + } } } + catch (const std::exception& e) { + errlog("Fatal error in TCP worker thread: %s", e.what()); + } } /* spawn as many of these as required, they call Accept on a socket on which they will accept queries, and diff --git a/pdns/dnsdistdist/dnsdist-nghttp2.cc b/pdns/dnsdistdist/dnsdist-nghttp2.cc index 0f1e78d8fa..69c88dfd9b 100644 --- a/pdns/dnsdistdist/dnsdist-nghttp2.cc +++ b/pdns/dnsdistdist/dnsdist-nghttp2.cc @@ -1029,52 +1029,62 @@ static void dohClientThread(int crossProtocolPipeFD) { setThreadName("dnsdist/dohClie"); - DoHClientThreadData data; - data.mplexer->addReadFD(crossProtocolPipeFD, handleCrossProtocolQuery, &data); - - struct timeval now; - gettimeofday(&now, nullptr); - time_t lastTimeoutScan = now.tv_sec; - - for (;;) { - data.mplexer->run(&now); - - if (now.tv_sec > lastTimeoutScan) { - lastTimeoutScan = now.tv_sec; - - DownstreamDoHConnectionsManager::cleanupClosedConnections(now); - handleH2Timeouts(*data.mplexer, now); - - if (g_dohStatesDumpRequested > 0) { - /* just to keep things clean in the output, debug only */ - static std::mutex s_lock; - std::lock_guard lck(s_lock); - if (g_dohStatesDumpRequested > 0) { - /* no race here, we took the lock so it can only be increased in the meantime */ - --g_dohStatesDumpRequested; - errlog("Dumping the DoH client states, as requested:"); - data.mplexer->runForAllWatchedFDs([](bool isRead, int fd, const FDMultiplexer::funcparam_t& param, struct timeval ttd) { - struct timeval lnow; - gettimeofday(&lnow, nullptr); - if (ttd.tv_sec > 0) { - errlog("- Descriptor %d is in %s state, TTD in %d", fd, (isRead ? "read" : "write"), (ttd.tv_sec - lnow.tv_sec)); - } - else { - errlog("- Descriptor %d is in %s state, no TTD set", fd, (isRead ? "read" : "write")); - } + try { + DoHClientThreadData data; + data.mplexer->addReadFD(crossProtocolPipeFD, handleCrossProtocolQuery, &data); - if (param.type() == typeid(std::shared_ptr)) { - auto conn = boost::any_cast>(param); - errlog(" - %s", conn->toString()); - } - else if (param.type() == typeid(DoHClientThreadData*)) { - errlog(" - Worker thread pipe"); + struct timeval now; + gettimeofday(&now, nullptr); + time_t lastTimeoutScan = now.tv_sec; + + for (;;) { + data.mplexer->run(&now); + + if (now.tv_sec > lastTimeoutScan) { + lastTimeoutScan = now.tv_sec; + + try { + DownstreamDoHConnectionsManager::cleanupClosedConnections(now); + handleH2Timeouts(*data.mplexer, now); + + if (g_dohStatesDumpRequested > 0) { + /* just to keep things clean in the output, debug only */ + static std::mutex s_lock; + std::lock_guard lck(s_lock); + if (g_dohStatesDumpRequested > 0) { + /* no race here, we took the lock so it can only be increased in the meantime */ + --g_dohStatesDumpRequested; + errlog("Dumping the DoH client states, as requested:"); + data.mplexer->runForAllWatchedFDs([](bool isRead, int fd, const FDMultiplexer::funcparam_t& param, struct timeval ttd) { + struct timeval lnow; + gettimeofday(&lnow, nullptr); + if (ttd.tv_sec > 0) { + errlog("- Descriptor %d is in %s state, TTD in %d", fd, (isRead ? "read" : "write"), (ttd.tv_sec - lnow.tv_sec)); + } + else { + errlog("- Descriptor %d is in %s state, no TTD set", fd, (isRead ? "read" : "write")); + } + + if (param.type() == typeid(std::shared_ptr)) { + auto conn = boost::any_cast>(param); + errlog(" - %s", conn->toString()); + } + else if (param.type() == typeid(DoHClientThreadData*)) { + errlog(" - Worker thread pipe"); + } + }); } - }); + } + } + catch (const std::exception& e) { + errlog("Error in outgoing DoH thread: %s", e.what()); } } } } + catch (const std::exception& e) { + errlog("Fatal error in outgoing DoH thread: %s", e.what()); + } } static bool select_next_proto_callback(unsigned char** out, unsigned char* outlen, const unsigned char* in, unsigned int inlen)