From: Remi Gacogne Date: Tue, 19 Oct 2021 15:15:47 +0000 (+0200) Subject: dnsdist: Better detection of closed TLS downstream connections X-Git-Tag: rec-4.6.0-beta1~28^2~9 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=767f55145ab3b38b311df77b886664d7935f08a5;p=thirdparty%2Fpdns.git dnsdist: Better detection of closed TLS downstream connections --- diff --git a/pdns/dnsdist-console.cc b/pdns/dnsdist-console.cc index 5b27aec4d3..68ea689e75 100644 --- a/pdns/dnsdist-console.cc +++ b/pdns/dnsdist-console.cc @@ -585,6 +585,8 @@ const std::vector g_consoleKeywords{ { "setConsoleMaximumConcurrentConnections", true, "max", "Set the maximum number of concurrent console connections" }, { "setConsoleOutputMaxMsgSize", true, "messageSize", "set console message maximum size in bytes, default is 10 MB" }, { "setDefaultBPFFilter", true, "filter", "When used at configuration time, the corresponding BPFFilter will be attached to every bind" }, + { "setDoHDownstreamCleanupInterval", true, "interval", "minimum interval in seconds between two cleanups of the idle DoH downstream connections" }, + { "setDoHDownstreamMaxIdleTime", true, "time", "Maximum time in seconds that a downstream DoH connection to a backend might stay idle" }, { "setDynBlocksAction", true, "action", "set which action is performed when a query is blocked. Only DNSAction.Drop (the default) and DNSAction.Refused are supported" }, { "setDynBlocksPurgeInterval", true, "sec", "set how often the expired dynamic block entries should be removed" }, { "setDropEmptyQueries", true, "drop", "Whether to drop empty queries right away instead of sending a NOTIMP response" }, @@ -593,6 +595,7 @@ const std::vector g_consoleKeywords{ { "setECSSourcePrefixV6", true, "prefix-length", "the EDNS Client Subnet prefix-length used for IPv6 queries" }, { "setKey", true, "key", "set access key to that key" }, { "setLocal", true, "addr [, {doTCP=true, reusePort=false, tcpFastOpenQueueSize=0, interface=\"\", cpus={}}]", "reset the list of addresses we listen on to this address" }, + { "setMaxCachedDoHConnectionsPerDownstream", true, "max", "Set the maximum number of inactive DoH connections to a backend cached by each worker DoH thread" }, { "setMaxCachedTCPConnectionsPerDownstream", true, "max", "Set the maximum number of inactive TCP connections to a backend cached by each worker TCP thread" }, { "setMaxTCPClientThreads", true, "n", "set the maximum of TCP client threads, handling TCP connections" }, { "setMaxTCPConnectionDuration", true, "n", "set the maximum duration of an incoming TCP connection, in seconds. 0 means unlimited" }, @@ -624,6 +627,7 @@ const std::vector g_consoleKeywords{ { "setStaleCacheEntriesTTL", true, "n", "allows using cache entries expired for at most n seconds when there is no backend available to answer for a query" }, { "setSyslogFacility", true, "facility", "set the syslog logging facility to 'facility'. Defaults to LOG_DAEMON" }, { "setTCPDownstreamCleanupInterval", true, "interval", "minimum interval in seconds between two cleanups of the idle TCP downstream connections" }, + { "setTCPDownstreamMaxIdleTime", true, "time", "Maximum time in seconds that a downstream TCP connection to a backend might stay idle" }, { "setTCPInternalPipeBufferSize", true, "size", "Set the size in bytes of the internal buffer of the pipes used internally to distribute connections to TCP (and DoT) workers threads" }, { "setTCPRecvTimeout", true, "n", "set the read timeout on TCP connections from the client, in seconds" }, { "setTCPSendTimeout", true, "n", "set the write timeout on TCP connections from the client, in seconds" }, diff --git a/pdns/dnsdist-lua.cc b/pdns/dnsdist-lua.cc index 07ee8f97c1..f4e23d9c14 100644 --- a/pdns/dnsdist-lua.cc +++ b/pdns/dnsdist-lua.cc @@ -1297,7 +1297,11 @@ static void setupLuaConfig(LuaContext& luaCtx, bool client, bool configCheck) }); luaCtx.writeFunction("setMaxCachedTCPConnectionsPerDownstream", [](size_t max) { - setMaxCachedTCPConnectionsPerDownstream(max); + DownstreamConnectionsManager::setMaxCachedConnectionsPerDownstream(max); + }); + + luaCtx.writeFunction("setMaxCachedDoHConnectionsPerDownstream", [](size_t max) { + setDoHDownstreamMaxConnectionsPerBackend(max); }); luaCtx.writeFunction("setOutgoingDoHWorkerThreads", [](uint64_t workers) { @@ -2103,6 +2107,21 @@ static void setupLuaConfig(LuaContext& luaCtx, bool client, bool configCheck) DownstreamConnectionsManager::setCleanupInterval(interval); }); + luaCtx.writeFunction("setDoHDownstreamCleanupInterval", [](uint16_t interval) { + setLuaSideEffect(); + setDoHDownstreamCleanupInterval(interval); + }); + + luaCtx.writeFunction("setTCPDownstreamMaxIdeTime", [](uint16_t max) { + setLuaSideEffect(); + DownstreamConnectionsManager::setMaxIdleTime(max); + }); + + luaCtx.writeFunction("setDoHDownstreamMaxIdeTime", [](uint16_t max) { + setLuaSideEffect(); + setDoHDownstreamMaxIdleTime(max); + }); + luaCtx.writeFunction("setConsoleConnectionsLogging", [](bool enabled) { g_logConsoleConnections = enabled; }); diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index 6d54b94943..cc60aefdc3 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -1033,7 +1033,6 @@ struct LocalHolders vector> setupLua(bool client, const std::string& config); void tcpAcceptorThread(ClientState* p); -void setMaxCachedTCPConnectionsPerDownstream(size_t max); #ifdef HAVE_DNS_OVER_HTTPS void dohThread(ClientState* cs); diff --git a/pdns/dnsdistdist/dnsdist-nghttp2.cc b/pdns/dnsdistdist/dnsdist-nghttp2.cc index 03abd4332a..1536c09ec8 100644 --- a/pdns/dnsdistdist/dnsdist-nghttp2.cc +++ b/pdns/dnsdistdist/dnsdist-nghttp2.cc @@ -66,6 +66,7 @@ public: void stopIO(); bool reachedMaxConcurrentQueries() const override; bool reachedMaxStreamID() const override; + bool isIdle() const override; private: static ssize_t send_callback(nghttp2_session* session, const uint8_t* data, size_t length, int flags, void* user_data); @@ -136,11 +137,17 @@ public: s_cleanupInterval = interval; } + static void setMaxIdleTime(uint16_t max) + { + s_maxIdleTime = max; + } + private: static thread_local map>> t_downstreamConnections; static thread_local time_t t_nextCleanup; static size_t s_maxCachedConnectionsPerDownstream; static uint16_t s_cleanupInterval; + static uint16_t s_maxIdleTime; }; uint32_t DoHConnectionToBackend::getConcurrentStreamsCount() const @@ -217,6 +224,11 @@ bool DoHConnectionToBackend::reachedMaxConcurrentQueries() const return false; } +bool DoHConnectionToBackend::isIdle() const +{ + return getConcurrentStreamsCount() == 0; +} + const std::unordered_map DoHConnectionToBackend::s_constants = { {"method-name", ":method"}, {"method-value", "POST"}, @@ -848,6 +860,7 @@ thread_local mapsecond.begin(); connIt != dsIt->second.end();) { @@ -906,7 +921,13 @@ void DownstreamDoHConnectionsManager::cleanupClosedConnections(struct timeval no continue; } - if (isTCPSocketUsable((*connIt)->getHandle())) { + if ((*connIt)->isIdle() && (*connIt)->getLastDataReceivedTime() < idleCutOff) { + /* idle for too long */ + connIt = dsIt->second.erase(connIt); + continue; + } + + if ((*connIt)->isUsable()) { ++connIt; } else { @@ -1326,3 +1347,18 @@ size_t handleH2Timeouts(FDMultiplexer& mplexer, const struct timeval& now) #endif /* HAVE_NGHTTP2 */ return got; } + +void setDoHDownstreamCleanupInterval(uint16_t max) +{ + DownstreamDoHConnectionsManager::setCleanupInterval(max); +} + +void setDoHDownstreamMaxIdleTime(uint16_t max) +{ + DownstreamDoHConnectionsManager::setMaxIdleTime(max); +} + +void setDoHDownstreamMaxConnectionsPerBackend(size_t max) +{ + DownstreamDoHConnectionsManager::setMaxCachedConnectionsPerDownstream(max); +} diff --git a/pdns/dnsdistdist/dnsdist-nghttp2.hh b/pdns/dnsdistdist/dnsdist-nghttp2.hh index a44930f4ca..7b7ed6333d 100644 --- a/pdns/dnsdistdist/dnsdist-nghttp2.hh +++ b/pdns/dnsdistdist/dnsdist-nghttp2.hh @@ -69,3 +69,7 @@ bool setupDoHClientProtocolNegotiation(std::shared_ptr& ctx); bool sendH2Query(const std::shared_ptr& ds, std::unique_ptr& mplexer, std::shared_ptr& sender, InternalQuery&& query, bool healthCheck); size_t handleH2Timeouts(FDMultiplexer& mplexer, const struct timeval& now); size_t clearH2Connections(); + +void setDoHDownstreamCleanupInterval(uint16_t max); +void setDoHDownstreamMaxIdleTime(uint16_t max); +void setDoHDownstreamMaxConnectionsPerBackend(size_t max); diff --git a/pdns/dnsdistdist/dnsdist-tcp-downstream.cc b/pdns/dnsdistdist/dnsdist-tcp-downstream.cc index 3d48e702b2..505415e7e6 100644 --- a/pdns/dnsdistdist/dnsdist-tcp-downstream.cc +++ b/pdns/dnsdistdist/dnsdist-tcp-downstream.cc @@ -447,7 +447,6 @@ void TCPConnectionToBackend::handleIOCallback(int fd, FDMultiplexer::funcparam_t void TCPConnectionToBackend::queueQuery(std::shared_ptr& sender, TCPQuery&& query) { - cerr<<"in "<<__PRETTY_FUNCTION__<<" for a query with a buffer of size "<(*d_mplexer, d_handler->getDescriptor()); } @@ -803,7 +802,7 @@ std::shared_ptr DownstreamConnectionsManager::getConnect return entry; } - if (isTCPSocketUsable(entry->getHandle())) { + if (entry->isUsable()) { ++ds->tcpReusedConnections; return entry; } @@ -835,6 +834,8 @@ void DownstreamConnectionsManager::cleanupClosedTCPConnections(struct timeval no struct timeval freshCutOff = now; freshCutOff.tv_sec -= 1; + struct timeval idleCutOff = now; + idleCutOff.tv_sec -= s_maxIdleTime; for (auto dsIt = t_downstreamConnections.begin(); dsIt != t_downstreamConnections.end(); ) { for (auto connIt = dsIt->second.begin(); connIt != dsIt->second.end(); ) { @@ -849,7 +850,13 @@ void DownstreamConnectionsManager::cleanupClosedTCPConnections(struct timeval no continue; } - if (isTCPSocketUsable((*connIt)->getHandle())) { + if ((*connIt)->isIdle() && (*connIt)->getLastDataReceivedTime() < idleCutOff) { + /* idle for too long */ + connIt = dsIt->second.erase(connIt); + continue; + } + + if ((*connIt)->isUsable()) { ++connIt; } else { @@ -878,12 +885,8 @@ size_t DownstreamConnectionsManager::clear() return count; } -void setMaxCachedTCPConnectionsPerDownstream(size_t max) -{ - DownstreamConnectionsManager::setMaxCachedConnectionsPerDownstream(max); -} - thread_local map>> DownstreamConnectionsManager::t_downstreamConnections; thread_local time_t DownstreamConnectionsManager::t_nextCleanup{0}; size_t DownstreamConnectionsManager::s_maxCachedConnectionsPerDownstream{10}; uint16_t DownstreamConnectionsManager::s_cleanupInterval{60}; +uint16_t DownstreamConnectionsManager::s_maxIdleTime{300}; diff --git a/pdns/dnsdistdist/dnsdist-tcp-downstream.hh b/pdns/dnsdistdist/dnsdist-tcp-downstream.hh index 0ea546b3df..ac9d7e29cf 100644 --- a/pdns/dnsdistdist/dnsdist-tcp-downstream.hh +++ b/pdns/dnsdistdist/dnsdist-tcp-downstream.hh @@ -26,6 +26,16 @@ public: return d_handler->getDescriptor(); } + /* whether the underlying socket has been closed under our feet, basically */ + bool isUsable() const + { + if (!d_handler) { + return false; + } + + return d_handler->isUsable(); + } + const std::shared_ptr& getDS() const { return d_ds; @@ -104,6 +114,7 @@ public: virtual bool reachedMaxStreamID() const = 0; virtual bool reachedMaxConcurrentQueries() const = 0; + virtual bool isIdle() const = 0; virtual void release() { } @@ -214,7 +225,7 @@ public: virtual ~TCPConnectionToBackend(); - bool isIdle() const + bool isIdle() const override { return d_state == State::idle && d_pendingQueries.size() == 0 && d_pendingResponses.size() == 0; } @@ -303,9 +314,15 @@ public: s_cleanupInterval = interval; } + static void setMaxIdleTime(uint16_t max) + { + s_maxIdleTime = max; + } + private: static thread_local map>> t_downstreamConnections; static thread_local time_t t_nextCleanup; static size_t s_maxCachedConnectionsPerDownstream; static uint16_t s_cleanupInterval; + static uint16_t s_maxIdleTime; }; diff --git a/pdns/dnsdistdist/docs/reference/tuning.rst b/pdns/dnsdistdist/docs/reference/tuning.rst index 86f42707fa..5eeb400d4b 100644 --- a/pdns/dnsdistdist/docs/reference/tuning.rst +++ b/pdns/dnsdistdist/docs/reference/tuning.rst @@ -1,6 +1,29 @@ Tuning related functions ======================== +.. function:: setDoHDownstreamCleanupInterval(interval) + + .. versionadded:: 1.7.0 + + Set how often, in seconds, the outgoing DoH connections to backends of a given worker thread are scanned to expunge the ones that are no longer usable. The default is 60 so once per minute and per worker thread. + :param int interval: The interval in seconds. + +.. function:: setDoHDownstreamMaxIdleTime(max) + + .. versionadded:: 1.7.0 + + Set how long, in seconds, an outgoing DoH connection to a backend might stay idle before being closed. The default is 300 so 5 minutes. + + :param int max: The maximum time in seconds. + +.. function:: setMaxCachedDoHConnectionsPerDownstream(max) + + .. versionadded:: 1.7.0 + + Set the maximum number of inactive DoH connections to a backend cached by each DoH worker thread. These connections can be reused when a new query comes in, instead of having to establish a new connection. dnsdist regularly checks whether the other end has closed any cached connection, closing them in that case. + + :param int max: The maximum number of inactive connections to keep. Default is 10, so 10 connections per backend and per DoH worker thread. + .. function:: setMaxCachedTCPConnectionsPerDownstream(max) .. versionadded:: 1.6.0 @@ -84,6 +107,23 @@ Tuning related functions :param int num: +.. function:: setTCPDownstreamCleanupInterval(interval) + + .. versionadded:: 1.6.0 + + Set how often, in seconds, the outgoing TCP connections to backends of a given worker thread are scanned to expunge the ones that are no longer usable. The default is 60 so once per minute and per worker thread. + + :param int interval: The interval in seconds. + +.. function:: setDoHDownstreamMaxIdleTime(max) + + .. versionadded:: 1.7.0 + + Set how long, in seconds, an outgoing DoH connection to a backend might stay idle before being closed. The default is 300 so 5 minutes. + + :param int max: The maximum time in seconds. + + .. function:: setTCPInternalPipeBufferSize(size) .. versionadded:: 1.6.0 diff --git a/pdns/dnsdistdist/test-dnsdistnghttp2_cc.cc b/pdns/dnsdistdist/test-dnsdistnghttp2_cc.cc index 47ab8caa1c..7a75c02378 100644 --- a/pdns/dnsdistdist/test-dnsdistnghttp2_cc.cc +++ b/pdns/dnsdistdist/test-dnsdistnghttp2_cc.cc @@ -410,6 +410,11 @@ public: return false; } + bool isUsable() const override + { + return true; + } + std::string getServerNameIndication() const override { return ""; diff --git a/pdns/dnsdistdist/test-dnsdisttcp_cc.cc b/pdns/dnsdistdist/test-dnsdisttcp_cc.cc index 2a52e73ecd..47560be1ac 100644 --- a/pdns/dnsdistdist/test-dnsdisttcp_cc.cc +++ b/pdns/dnsdistdist/test-dnsdisttcp_cc.cc @@ -222,6 +222,11 @@ public: return false; } + bool isUsable() const override + { + return true; + } + std::string getServerNameIndication() const override { return ""; diff --git a/pdns/tcpiohandler.cc b/pdns/tcpiohandler.cc index 05d7dbeac6..4338f315c8 100644 --- a/pdns/tcpiohandler.cc +++ b/pdns/tcpiohandler.cc @@ -388,6 +388,28 @@ public: return false; } + bool isUsable() const override + { + if (!d_conn) { + return false; + } + + char buf; + int res = SSL_peek(d_conn.get(), &buf, sizeof(buf)); + if (res > 0) { + return true; + } + try { + convertIORequestToIOState(res); + return true; + } + catch (...) { + return false; + } + + return false; + } + void close() override { if (d_conn) { @@ -1314,6 +1336,16 @@ public: return false; } + bool isUsable() const override + { + if (!d_conn) { + return false; + } + + /* as far as I can tell we can't peek so we cannot do better */ + return isTCPSocketUsable(d_socket); + } + std::string getServerNameIndication() const override { if (d_conn) { diff --git a/pdns/tcpiohandler.hh b/pdns/tcpiohandler.hh index ce53f4a65c..7afbef1ffe 100644 --- a/pdns/tcpiohandler.hh +++ b/pdns/tcpiohandler.hh @@ -38,6 +38,7 @@ public: virtual bool hasSessionBeenResumed() const = 0; virtual std::vector> getSessions() = 0; virtual void setSession(std::unique_ptr& session) = 0; + virtual bool isUsable() const = 0; virtual void close() = 0; void setUnknownTicketKey() @@ -530,6 +531,14 @@ public: return d_conn->getSessions(); } + bool isUsable() const + { + if (!d_conn) { + return isTCPSocketUsable(d_socket); + } + return d_conn->isUsable(); + } + private: std::unique_ptr d_conn{nullptr}; ComboAddress d_remote;