]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
dnsdist: Fix tcpGaveUp and tcpDownstreamTimeout counters
authorRemi Gacogne <remi.gacogne@powerdns.com>
Fri, 2 Oct 2020 15:03:06 +0000 (17:03 +0200)
committerRemi Gacogne <remi.gacogne@powerdns.com>
Tue, 10 Nov 2020 08:52:23 +0000 (09:52 +0100)
pdns/dnsdistdist/dnsdist-tcp-downstream.cc
pdns/dnsdistdist/dnsdist-tcp-downstream.hh

index 1d5271360a80cc42e1aee7ede6a2f047a0e9e5ea..8e63caf70e47114cb045dc3558a6916b89fccda8 100644 (file)
@@ -251,7 +251,8 @@ void TCPConnectionToBackend::handleIO(std::shared_ptr<TCPConnectionToBackend>& c
     if (!reconnected) {
       /* reconnect failed, we give up */
       DEBUGLOG("reconnect failed, we give up");
-      conn->notifyAllQueriesFailed(now);
+      ++conn->d_ds->tcpGaveUp;
+      conn->notifyAllQueriesFailed(now, FailureReason::gaveUp);
     }
   }
 
@@ -375,6 +376,7 @@ bool TCPConnectionToBackend::reconnect()
 
 void TCPConnectionToBackend::handleTimeout(const struct timeval& now, bool write)
 {
+  /* in some cases we could retry, here, reconnecting and sending our pending responses again */
   if (write) {
     ++d_ds->tcpWriteTimeouts;
   }
@@ -386,10 +388,10 @@ void TCPConnectionToBackend::handleTimeout(const struct timeval& now, bool write
     d_ioState->reset();
   }
 
-  notifyAllQueriesFailed(now, true);
+  notifyAllQueriesFailed(now, FailureReason::timeout);
 }
 
-void TCPConnectionToBackend::notifyAllQueriesFailed(const struct timeval& now, bool timeout)
+void TCPConnectionToBackend::notifyAllQueriesFailed(const struct timeval& now, FailureReason reason)
 {
   d_connectionDied = true;
 
@@ -404,9 +406,12 @@ void TCPConnectionToBackend::notifyAllQueriesFailed(const struct timeval& now, b
     return;
   }
 
-  if (timeout) {
+  if (reason == FailureReason::timeout) {
     ++clientConn->d_ci.cs->tcpDownstreamTimeouts;
   }
+  else if (reason == FailureReason::gaveUp) {
+    ++clientConn->d_ci.cs->tcpGaveUp;
+  }
 
   if (d_state == State::sendingQueryToBackend) {
     clientConn->notifyIOError(clientConn, std::move(d_currentQuery.d_idstate), now);
@@ -462,14 +467,14 @@ IOState TCPConnectionToBackend::handleResponse(std::shared_ptr<TCPConnectionToBa
     }
     catch (const std::exception& e) {
       DEBUGLOG("Unable to get query ID");
-      notifyAllQueriesFailed(now);
+      notifyAllQueriesFailed(now, FailureReason::unexpectedQueryID);
       throw;
     }
 
     auto it = d_pendingResponses.find(queryId);
     if (it == d_pendingResponses.end()) {
       DEBUGLOG("could not find any corresponding query for ID "<<queryId<<". This is likely a duplicated ID over the same TCP connection, giving up!");
-      notifyAllQueriesFailed(now);
+      notifyAllQueriesFailed(now, FailureReason::unexpectedQueryID);
       return IOState::Done;
     }
 
index bc8d6d442d0804610de5fe347383a9f69777fe32..fdfcfbbd86e9bcae0a86d63fa371a478243ad6a2 100644 (file)
@@ -164,6 +164,11 @@ public:
   void setProxyProtocolPayloadAdded(bool added);
 
 private:
+  /* waitingForResponseFromBackend is a state where we have not yet started reading the size,
+     so we can still switch to sending instead */
+  enum class State : uint8_t { idle, doingHandshake, sendingQueryToBackend, waitingForResponseFromBackend, readingResponseSizeFromBackend, readingResponseFromBackend };
+  enum class FailureReason : uint8_t { /* too many attempts */ gaveUp, timeout, unexpectedQueryID };
+
   static void handleIO(std::shared_ptr<TCPConnectionToBackend>& conn, const struct timeval& now);
   static void handleIOCallback(int fd, FDMultiplexer::funcparam_t& param);
   static IOState queueNextQuery(std::shared_ptr<TCPConnectionToBackend>& conn);
@@ -172,7 +177,7 @@ private:
   IOState handleResponse(std::shared_ptr<TCPConnectionToBackend>& conn, const struct timeval& now);
   uint16_t getQueryIdFromResponse();
   bool reconnect();
-  void notifyAllQueriesFailed(const struct timeval& now, bool timeout = false);
+  void notifyAllQueriesFailed(const struct timeval& now, FailureReason reason);
 
   boost::optional<struct timeval> getBackendReadTTD(const struct timeval& now) const
   {
@@ -204,9 +209,6 @@ private:
     return res;
   }
 
-  /* waitingForResponseFromBackend is a state where we have not yet started reading the size,
-     so we can still switch to sending instead */
-  enum class State { idle, doingHandshake, sendingQueryToBackend, waitingForResponseFromBackend, readingResponseSizeFromBackend, readingResponseFromBackend };
   static const uint16_t s_xfrID;
 
   std::vector<uint8_t> d_responseBuffer;