]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
dnsdist: Better handling of exceptions in the TCP/DoH workers
authorRemi Gacogne <remi.gacogne@powerdns.com>
Thu, 30 Sep 2021 13:42:25 +0000 (15:42 +0200)
committerRemi Gacogne <remi.gacogne@powerdns.com>
Wed, 13 Oct 2021 13:19:38 +0000 (15:19 +0200)
Exceptions might be raised when the process is exiting because the
internal pipes have been closed by the remote end, for example when
coverage mode is enabled.

pdns/dnsdist-tcp.cc
pdns/dnsdistdist/dnsdist-nghttp2.cc

index 23ccce62d621e62d2d4d5b9b76533c5e17a9f7d3..9f87070df0f612693768311c9e911213cb69aa78 100644 (file)
@@ -1207,91 +1207,101 @@ static void tcpClientThread(int pipefd, int crossProtocolQueriesPipeFD, int cros
 
   setThreadName("dnsdist/tcpClie");
 
-  TCPClientThreadData data;
-  /* this is the writing end! */
-  data.crossProtocolResponsesPipe = crossProtocolResponsesWritePipeFD;
-  data.mplexer->addReadFD(pipefd, handleIncomingTCPQuery, &data);
-  data.mplexer->addReadFD(crossProtocolQueriesPipeFD, handleCrossProtocolQuery, &data);
-  data.mplexer->addReadFD(crossProtocolResponsesListenPipeFD, handleCrossProtocolResponse, &data);
-
-  struct timeval now;
-  gettimeofday(&now, nullptr);
-  time_t lastTimeoutScan = now.tv_sec;
-
-  for (;;) {
-    data.mplexer->run(&now);
-
-    DownstreamConnectionsManager::cleanupClosedTCPConnections(now);
-
-    if (now.tv_sec > lastTimeoutScan) {
-      lastTimeoutScan = now.tv_sec;
-      auto expiredReadConns = data.mplexer->getTimeouts(now, false);
-      for (const auto& cbData : expiredReadConns) {
-        if (cbData.second.type() == typeid(std::shared_ptr<IncomingTCPConnectionState>)) {
-          auto state = boost::any_cast<std::shared_ptr<IncomingTCPConnectionState>>(cbData.second);
-          if (cbData.first == state->d_handler.getDescriptor()) {
-            vinfolog("Timeout (read) from remote TCP client %s", state->d_ci.remote.toStringWithPort());
-            state->handleTimeout(state, false);
-          }
-        }
-        else if (cbData.second.type() == typeid(std::shared_ptr<TCPConnectionToBackend>)) {
-          auto conn = boost::any_cast<std::shared_ptr<TCPConnectionToBackend>>(cbData.second);
-          vinfolog("Timeout (read) from remote backend %s", conn->getBackendName());
-          conn->handleTimeout(now, false);
-        }
-      }
-
-      auto expiredWriteConns = data.mplexer->getTimeouts(now, true);
-      for (const auto& cbData : expiredWriteConns) {
-        if (cbData.second.type() == typeid(std::shared_ptr<IncomingTCPConnectionState>)) {
-          auto state = boost::any_cast<std::shared_ptr<IncomingTCPConnectionState>>(cbData.second);
-          if (cbData.first == state->d_handler.getDescriptor()) {
-            vinfolog("Timeout (write) from remote TCP client %s", state->d_ci.remote.toStringWithPort());
-            state->handleTimeout(state, true);
-          }
-        }
-        else if (cbData.second.type() == typeid(std::shared_ptr<TCPConnectionToBackend>)) {
-          auto conn = boost::any_cast<std::shared_ptr<TCPConnectionToBackend>>(cbData.second);
-          vinfolog("Timeout (write) from remote backend %s", conn->getBackendName());
-          conn->handleTimeout(now, true);
-        }
-      }
+  try {
+    TCPClientThreadData data;
+    /* this is the writing end! */
+    data.crossProtocolResponsesPipe = crossProtocolResponsesWritePipeFD;
+    data.mplexer->addReadFD(pipefd, handleIncomingTCPQuery, &data);
+    data.mplexer->addReadFD(crossProtocolQueriesPipeFD, handleCrossProtocolQuery, &data);
+    data.mplexer->addReadFD(crossProtocolResponsesListenPipeFD, handleCrossProtocolResponse, &data);
 
-      if (g_tcpStatesDumpRequested > 0) {
-        /* just to keep things clean in the output, debug only */
-        static std::mutex s_lock;
-        std::lock_guard<decltype(s_lock)> lck(s_lock);
-        if (g_tcpStatesDumpRequested > 0) {
-          /* no race here, we took the lock so it can only be increased in the meantime */
-          --g_tcpStatesDumpRequested;
-          errlog("Dumping the TCP states, as requested:");
-          data.mplexer->runForAllWatchedFDs([](bool isRead, int fd, const FDMultiplexer::funcparam_t& param, struct timeval ttd)
-          {
-            struct timeval lnow;
-            gettimeofday(&lnow, nullptr);
-            if (ttd.tv_sec > 0) {
-            errlog("- Descriptor %d is in %s state, TTD in %d", fd, (isRead ? "read" : "write"), (ttd.tv_sec-lnow.tv_sec));
+    struct timeval now;
+    gettimeofday(&now, nullptr);
+    time_t lastTimeoutScan = now.tv_sec;
+
+    for (;;) {
+      data.mplexer->run(&now);
+
+      try {
+        DownstreamConnectionsManager::cleanupClosedTCPConnections(now);
+
+        if (now.tv_sec > lastTimeoutScan) {
+          lastTimeoutScan = now.tv_sec;
+          auto expiredReadConns = data.mplexer->getTimeouts(now, false);
+          for (const auto& cbData : expiredReadConns) {
+            if (cbData.second.type() == typeid(std::shared_ptr<IncomingTCPConnectionState>)) {
+              auto state = boost::any_cast<std::shared_ptr<IncomingTCPConnectionState>>(cbData.second);
+              if (cbData.first == state->d_handler.getDescriptor()) {
+                vinfolog("Timeout (read) from remote TCP client %s", state->d_ci.remote.toStringWithPort());
+                state->handleTimeout(state, false);
+              }
             }
-            else {
-              errlog("- Descriptor %d is in %s state, no TTD set", fd, (isRead ? "read" : "write"));
+            else if (cbData.second.type() == typeid(std::shared_ptr<TCPConnectionToBackend>)) {
+              auto conn = boost::any_cast<std::shared_ptr<TCPConnectionToBackend>>(cbData.second);
+              vinfolog("Timeout (read) from remote backend %s", conn->getBackendName());
+              conn->handleTimeout(now, false);
             }
+          }
 
-            if (param.type() == typeid(std::shared_ptr<IncomingTCPConnectionState>)) {
-              auto state = boost::any_cast<std::shared_ptr<IncomingTCPConnectionState>>(param);
-              errlog(" - %s", state->toString());
+          auto expiredWriteConns = data.mplexer->getTimeouts(now, true);
+          for (const auto& cbData : expiredWriteConns) {
+            if (cbData.second.type() == typeid(std::shared_ptr<IncomingTCPConnectionState>)) {
+              auto state = boost::any_cast<std::shared_ptr<IncomingTCPConnectionState>>(cbData.second);
+              if (cbData.first == state->d_handler.getDescriptor()) {
+                vinfolog("Timeout (write) from remote TCP client %s", state->d_ci.remote.toStringWithPort());
+                state->handleTimeout(state, true);
+              }
             }
-            else if (param.type() == typeid(std::shared_ptr<TCPConnectionToBackend>)) {
-              auto conn = boost::any_cast<std::shared_ptr<TCPConnectionToBackend>>(param);
-              errlog(" - %s", conn->toString());
+            else if (cbData.second.type() == typeid(std::shared_ptr<TCPConnectionToBackend>)) {
+              auto conn = boost::any_cast<std::shared_ptr<TCPConnectionToBackend>>(cbData.second);
+              vinfolog("Timeout (write) from remote backend %s", conn->getBackendName());
+              conn->handleTimeout(now, true);
             }
-            else if (param.type() == typeid(TCPClientThreadData*)) {
-              errlog(" - Worker thread pipe");
+          }
+
+          if (g_tcpStatesDumpRequested > 0) {
+            /* just to keep things clean in the output, debug only */
+            static std::mutex s_lock;
+            std::lock_guard<decltype(s_lock)> lck(s_lock);
+            if (g_tcpStatesDumpRequested > 0) {
+              /* no race here, we took the lock so it can only be increased in the meantime */
+              --g_tcpStatesDumpRequested;
+              errlog("Dumping the TCP states, as requested:");
+              data.mplexer->runForAllWatchedFDs([](bool isRead, int fd, const FDMultiplexer::funcparam_t& param, struct timeval ttd)
+              {
+                struct timeval lnow;
+                gettimeofday(&lnow, nullptr);
+                if (ttd.tv_sec > 0) {
+                  errlog("- Descriptor %d is in %s state, TTD in %d", fd, (isRead ? "read" : "write"), (ttd.tv_sec-lnow.tv_sec));
+                }
+                else {
+                  errlog("- Descriptor %d is in %s state, no TTD set", fd, (isRead ? "read" : "write"));
+                }
+
+                if (param.type() == typeid(std::shared_ptr<IncomingTCPConnectionState>)) {
+                  auto state = boost::any_cast<std::shared_ptr<IncomingTCPConnectionState>>(param);
+                  errlog(" - %s", state->toString());
+                }
+                else if (param.type() == typeid(std::shared_ptr<TCPConnectionToBackend>)) {
+                  auto conn = boost::any_cast<std::shared_ptr<TCPConnectionToBackend>>(param);
+                  errlog(" - %s", conn->toString());
+                }
+                else if (param.type() == typeid(TCPClientThreadData*)) {
+                  errlog(" - Worker thread pipe");
+                }
+              });
             }
-          });
+          }
         }
       }
+      catch (const std::exception& e) {
+        errlog("Error in TCP worker thread: %s", e.what());
+      }
     }
   }
+  catch (const std::exception& e) {
+    errlog("Fatal error in TCP worker thread: %s", e.what());
+  }
 }
 
 /* spawn as many of these as required, they call Accept on a socket on which they will accept queries, and
index 0f1e78d8fac144efc048ba91bb375e4811887963..69c88dfd9bd946245968cad07597e475788caa7c 100644 (file)
@@ -1029,52 +1029,62 @@ static void dohClientThread(int crossProtocolPipeFD)
 {
   setThreadName("dnsdist/dohClie");
 
-  DoHClientThreadData data;
-  data.mplexer->addReadFD(crossProtocolPipeFD, handleCrossProtocolQuery, &data);
-
-  struct timeval now;
-  gettimeofday(&now, nullptr);
-  time_t lastTimeoutScan = now.tv_sec;
-
-  for (;;) {
-    data.mplexer->run(&now);
-
-    if (now.tv_sec > lastTimeoutScan) {
-      lastTimeoutScan = now.tv_sec;
-
-      DownstreamDoHConnectionsManager::cleanupClosedConnections(now);
-      handleH2Timeouts(*data.mplexer, now);
-
-      if (g_dohStatesDumpRequested > 0) {
-        /* just to keep things clean in the output, debug only */
-        static std::mutex s_lock;
-        std::lock_guard<decltype(s_lock)> lck(s_lock);
-        if (g_dohStatesDumpRequested > 0) {
-          /* no race here, we took the lock so it can only be increased in the meantime */
-          --g_dohStatesDumpRequested;
-          errlog("Dumping the DoH client states, as requested:");
-          data.mplexer->runForAllWatchedFDs([](bool isRead, int fd, const FDMultiplexer::funcparam_t& param, struct timeval ttd) {
-            struct timeval lnow;
-            gettimeofday(&lnow, nullptr);
-            if (ttd.tv_sec > 0) {
-              errlog("- Descriptor %d is in %s state, TTD in %d", fd, (isRead ? "read" : "write"), (ttd.tv_sec - lnow.tv_sec));
-            }
-            else {
-              errlog("- Descriptor %d is in %s state, no TTD set", fd, (isRead ? "read" : "write"));
-            }
+  try {
+    DoHClientThreadData data;
+    data.mplexer->addReadFD(crossProtocolPipeFD, handleCrossProtocolQuery, &data);
 
-            if (param.type() == typeid(std::shared_ptr<DoHConnectionToBackend>)) {
-              auto conn = boost::any_cast<std::shared_ptr<DoHConnectionToBackend>>(param);
-              errlog(" - %s", conn->toString());
-            }
-            else if (param.type() == typeid(DoHClientThreadData*)) {
-              errlog(" - Worker thread pipe");
+    struct timeval now;
+    gettimeofday(&now, nullptr);
+    time_t lastTimeoutScan = now.tv_sec;
+
+    for (;;) {
+      data.mplexer->run(&now);
+
+      if (now.tv_sec > lastTimeoutScan) {
+        lastTimeoutScan = now.tv_sec;
+
+        try {
+          DownstreamDoHConnectionsManager::cleanupClosedConnections(now);
+          handleH2Timeouts(*data.mplexer, now);
+
+          if (g_dohStatesDumpRequested > 0) {
+            /* just to keep things clean in the output, debug only */
+            static std::mutex s_lock;
+            std::lock_guard<decltype(s_lock)> lck(s_lock);
+            if (g_dohStatesDumpRequested > 0) {
+              /* no race here, we took the lock so it can only be increased in the meantime */
+              --g_dohStatesDumpRequested;
+              errlog("Dumping the DoH client states, as requested:");
+              data.mplexer->runForAllWatchedFDs([](bool isRead, int fd, const FDMultiplexer::funcparam_t& param, struct timeval ttd) {
+                struct timeval lnow;
+                gettimeofday(&lnow, nullptr);
+                if (ttd.tv_sec > 0) {
+                  errlog("- Descriptor %d is in %s state, TTD in %d", fd, (isRead ? "read" : "write"), (ttd.tv_sec - lnow.tv_sec));
+                }
+                else {
+                  errlog("- Descriptor %d is in %s state, no TTD set", fd, (isRead ? "read" : "write"));
+                }
+
+                if (param.type() == typeid(std::shared_ptr<DoHConnectionToBackend>)) {
+                  auto conn = boost::any_cast<std::shared_ptr<DoHConnectionToBackend>>(param);
+                  errlog(" - %s", conn->toString());
+                }
+                else if (param.type() == typeid(DoHClientThreadData*)) {
+                  errlog(" - Worker thread pipe");
+                }
+              });
             }
-          });
+          }
+        }
+        catch (const std::exception& e) {
+          errlog("Error in outgoing DoH thread: %s", e.what());
         }
       }
     }
   }
+  catch (const std::exception& e) {
+    errlog("Fatal error in outgoing DoH thread: %s", e.what());
+  }
 }
 
 static bool select_next_proto_callback(unsigned char** out, unsigned char* outlen, const unsigned char* in, unsigned int inlen)