From: Remi Gacogne Date: Mon, 8 Feb 2021 16:44:11 +0000 (+0100) Subject: dnsdist: Fix exceptions handling in TCP/DoT worker threads X-Git-Tag: dnsdist-1.6.0-alpha2~11^2~27 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9c2ca55ffb6659810674afec5c26b9774f85a506;p=thirdparty%2Fpdns.git dnsdist: Fix exceptions handling in TCP/DoT worker threads --- diff --git a/pdns/dnsdist-tcp.cc b/pdns/dnsdist-tcp.cc index d3e3626e9e..7e159311a0 100644 --- a/pdns/dnsdist-tcp.cc +++ b/pdns/dnsdist-tcp.cc @@ -423,19 +423,42 @@ IOState IncomingTCPConnectionState::sendResponse(std::shared_ptrd_currentPos = 0; state->d_currentResponse = std::move(response); - auto iostate = state->d_handler.tryWrite(state->d_currentResponse.d_buffer, state->d_currentPos, state->d_currentResponse.d_buffer.size()); - if (iostate == IOState::Done) { - DEBUGLOG("response sent"); - if (!handleResponseSent(state, now)) { - return IOState::Done; + try { + auto iostate = state->d_handler.tryWrite(state->d_currentResponse.d_buffer, state->d_currentPos, state->d_currentResponse.d_buffer.size()); + if (iostate == IOState::Done) { + DEBUGLOG("response sent"); + if (!handleResponseSent(state, now)) { + return IOState::Done; + } + return sendQueuedResponses(state, now); + } else { + return IOState::NeedWrite; + DEBUGLOG("partial write"); } - return sendQueuedResponses(state, now); - } else { - return IOState::NeedWrite; - DEBUGLOG("partial write"); + } + catch (const std::exception& e) { + vinfolog("Closing TCP client connection with %s: %s", state->d_ci.remote.toStringWithPort(), e.what()); + DEBUGLOG("Closing TCP client connection: "<d_ci.cs->tcpDiedSendingResponse; + + state->terminateClientConnection(); + + return IOState::Done; } } +void IncomingTCPConnectionState::terminateClientConnection() +{ + d_queuedResponses.clear(); + /* we have already released idle connections that could be reused, + we don't care about the ones still waiting for responses */ + d_activeConnectionsToBackend.clear(); + /* meaning we will no longer be 'active' when the backend + response or timeout comes in */ + d_ioState->reset(); + d_handler.close(); +} + /* called when handling a response or error coming from a backend */ void IncomingTCPConnectionState::sendOrQueueResponse(std::shared_ptr& state, const struct timeval& now, TCPResponse&& response) { @@ -465,8 +488,13 @@ void IncomingTCPConnectionState::handleResponse(std::shared_ptrrelease(); - DownstreamConnectionsManager::releaseDownstreamConnection(std::move(*it)); + try { + response.d_connection->release(); + DownstreamConnectionsManager::releaseDownstreamConnection(std::move(*it)); + } + catch (const std::exception& e) { + vinfolog("Error releasing connection: %s", e.what()); + } list.erase(it); break; } @@ -670,8 +698,8 @@ static IOState handleQuery(std::shared_ptr& state, c void IncomingTCPConnectionState::handleIOCallback(int fd, FDMultiplexer::funcparam_t& param) { auto conn = boost::any_cast>(param); - if (fd != conn->d_ci.fd) { - throw std::runtime_error("Unexpected socket descriptor " + std::to_string(fd) + " received in " + std::string(__PRETTY_FUNCTION__) + ", expected " + std::to_string(conn->d_ci.fd)); + if (fd != conn->d_handler.getDescriptor()) { + throw std::runtime_error("Unexpected socket descriptor " + std::to_string(fd) + " received in " + std::string(__PRETTY_FUNCTION__) + ", expected " + std::to_string(conn->d_handler.getDescriptor())); } struct timeval now; @@ -913,7 +941,12 @@ void IncomingTCPConnectionState::notifyIOError(std::shared_ptrd_queuedResponses.front()); state->d_queuedResponses.pop_front(); state->d_state = IncomingTCPConnectionState::State::idle; - sendOrQueueResponse(state, now, std::move(resp)); + try { + sendOrQueueResponse(state, now, std::move(resp)); + } + catch (const std::exception& e) { + vinfolog("exception in notifyIOError: %s", e.what()); + } } else { // the backend code already tried to reconnect if it was possible @@ -1041,7 +1074,7 @@ static void tcpClientThread(int pipefd) for (const auto& cbData : expiredReadConns) { if (cbData.second.type() == typeid(std::shared_ptr)) { auto state = boost::any_cast>(cbData.second); - if (cbData.first == state->d_ci.fd) { + if (cbData.first == state->d_handler.getDescriptor()) { vinfolog("Timeout (read) from remote TCP client %s", state->d_ci.remote.toStringWithPort()); state->handleTimeout(state, false); } @@ -1057,7 +1090,7 @@ static void tcpClientThread(int pipefd) for (const auto& cbData : expiredWriteConns) { if (cbData.second.type() == typeid(std::shared_ptr)) { auto state = boost::any_cast>(cbData.second); - if (cbData.first == state->d_ci.fd) { + if (cbData.first == state->d_handler.getDescriptor()) { vinfolog("Timeout (write) from remote TCP client %s", state->d_ci.remote.toStringWithPort()); state->handleTimeout(state, true); } diff --git a/pdns/dnsdistdist/dnsdist-tcp-downstream.cc b/pdns/dnsdistdist/dnsdist-tcp-downstream.cc index a66469f3ff..645abbd299 100644 --- a/pdns/dnsdistdist/dnsdist-tcp-downstream.cc +++ b/pdns/dnsdistdist/dnsdist-tcp-downstream.cc @@ -25,6 +25,13 @@ void TCPConnectionToBackend::assignToClientConnection(std::shared_ptroutstanding -= d_pendingResponses.size(); + } + + d_pendingResponses.clear(); + d_pendingQueries.clear(); + d_clientConn.reset(); if (d_ioState) { d_ioState.reset(); @@ -169,6 +176,9 @@ void TCPConnectionToBackend::handleIO(std::shared_ptr& c } catch (const std::exception& e) { vinfolog("Got an exception while handling TCP response from %s (client is %s): %s", conn->d_ds ? conn->d_ds->getName() : "unknown", conn->d_currentQuery.d_idstate.origRemote.toStringWithPort(), e.what()); + ioGuard.release(); + conn->release(); + return; } } } @@ -390,21 +400,23 @@ void TCPConnectionToBackend::handleTimeout(const struct timeval& now, bool write ++d_ds->tcpReadTimeouts; } - if (d_ioState) { - d_ioState->reset(); + try { + notifyAllQueriesFailed(now, FailureReason::timeout); + } + catch (const std::exception& e) { + vinfolog("Got an exception while notifying a timeout: %s", e.what()); + } + catch (...) { + vinfolog("Got exception while notifying a timeout"); } - notifyAllQueriesFailed(now, FailureReason::timeout); + release(); } void TCPConnectionToBackend::notifyAllQueriesFailed(const struct timeval& now, FailureReason reason) { d_connectionDied = true; - if (!d_usedForXFR) { - d_ds->outstanding -= d_pendingResponses.size(); - } - auto& clientConn = d_clientConn; if (!clientConn->active()) { // a client timeout occured, or something like that */ @@ -419,22 +431,27 @@ void TCPConnectionToBackend::notifyAllQueriesFailed(const struct timeval& now, F ++clientConn->d_ci.cs->tcpGaveUp; } - if (d_state == State::sendingQueryToBackend) { - clientConn->notifyIOError(clientConn, std::move(d_currentQuery.d_idstate), now); - } + try { + if (d_state == State::sendingQueryToBackend) { + clientConn->notifyIOError(clientConn, std::move(d_currentQuery.d_idstate), now); + } - for (auto& query : d_pendingQueries) { - clientConn->notifyIOError(clientConn, std::move(query.d_idstate), now); - } + for (auto& query : d_pendingQueries) { + clientConn->notifyIOError(clientConn, std::move(query.d_idstate), now); + } - for (auto& response : d_pendingResponses) { - clientConn->notifyIOError(clientConn, std::move(response.second.d_idstate), now); + for (auto& response : d_pendingResponses) { + clientConn->notifyIOError(clientConn, std::move(response.second.d_idstate), now); + } + } + catch (const std::exception& e) { + vinfolog("Got an exception while notifying: %s", e.what()); + } + catch (...) { + vinfolog("Got exception while notifying"); } - d_pendingQueries.clear(); - d_pendingResponses.clear(); - - d_clientConn.reset(); + release(); } IOState TCPConnectionToBackend::handleResponse(std::shared_ptr& conn, const struct timeval& now) @@ -445,11 +462,8 @@ IOState TCPConnectionToBackend::handleResponse(std::shared_ptractive()) { // a client timeout occured, or something like that */ d_connectionDied = true; - d_clientConn.reset(); - if (!conn->d_usedForXFR) { - --conn->d_ds->outstanding; - } + release(); return IOState::Done; } diff --git a/pdns/dnsdistdist/dnsdist-tcp-upstream.hh b/pdns/dnsdistdist/dnsdist-tcp-upstream.hh index 794b151858..a9ba94c1ae 100644 --- a/pdns/dnsdistdist/dnsdist-tcp-upstream.hh +++ b/pdns/dnsdistdist/dnsdist-tcp-upstream.hh @@ -44,6 +44,7 @@ struct ConnectionInfo close(fd); fd = -1; } + if (cs) { --cs->tcpCurrentConnections; } @@ -65,6 +66,8 @@ public: if (getsockname(d_ci.fd, reinterpret_cast(&d_origDest), &socklen)) { d_origDest = d_ci.cs->local; } + /* belongs to the handler now */ + d_ci.fd = -1; d_proxiedDestination = d_origDest; d_proxiedRemote = d_ci.remote; } @@ -159,6 +162,7 @@ public: static void handleXFRResponse(std::shared_ptr& state, const struct timeval& now, TCPResponse&& response); static void handleTimeout(std::shared_ptr& state, bool write); + void terminateClientConnection(); void queueQuery(TCPQuery&& query); bool canAcceptNewQueries() const; @@ -171,7 +175,7 @@ public: std::string toString() const { ostringstream o; - o << "Incoming TCP connection from "<getState()) : "empty")<<", queries count is "<getState()) : "empty")<<", queries count is "< 0 && handler.write(proxyheader.data(), proxyheader.size(), timeout) != proxyheader.size()) { diff --git a/pdns/sstuff.hh b/pdns/sstuff.hh index 8a74451ee4..38d57c85e7 100644 --- a/pdns/sstuff.hh +++ b/pdns/sstuff.hh @@ -351,7 +351,14 @@ public: { return d_socket; } - + + int releaseHandle() + { + int ret = d_socket; + d_socket = -1; + return ret; + } + private: static const size_t s_buflen{4096}; std::string d_buffer; diff --git a/pdns/tcpiohandler.hh b/pdns/tcpiohandler.hh index 3af40566ce..1487ad883a 100644 --- a/pdns/tcpiohandler.hh +++ b/pdns/tcpiohandler.hh @@ -201,19 +201,25 @@ public: close(); } - /* Prepare the connection but does not close the descriptor */ void close() { if (d_conn) { d_conn->close(); d_conn.reset(); } - else if (d_socket != -1) { + + if (d_socket != -1) { shutdown(d_socket, SHUT_RDWR); + ::close(d_socket); d_socket = -1; } } + int getDescriptor() const + { + return d_socket; + } + IOState tryConnect(bool fastOpen, const ComboAddress& remote) { /* yes, this is only the TLS connect not the socket one,