From c5cabe15e4ae707b8360bc19c3043b2a6b6bff7f Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Fri, 28 Mar 2025 15:52:08 +0100 Subject: [PATCH] dnsdist: Add mitigations against misbehaving TCP/TLS clients This commit adds several mitigations against misbehaving TCP/TLS clients: - when a client is near the limit of concurrent TCP connections it is allowed to have, the number of DNS queries over a single TCP connection is restricted to 1 and the idle timout is reduced to 500 ms - the same restrictions are applied to all connections if the frontend is near the limit of concurrent TCP connections - a limit of 50 read I/O events per query is enforced on incoming TCP connections, to prevent a connection from continuously sending very small packets to keep the worker busy. Clients exceeding this limit can be prevented from opening new TCP connections for a configurable amount of time - three new configurable rates are introduced: new TCP connections per second per client, new TLS sessions per second per client, resumed TLS sessions per secondper client. Clients exceeding these rates can be prevented from opening new TCP connections for a configurable amount of time --- pdns/dnsdistdist/Makefile.am | 4 +- pdns/dnsdistdist/dnsdist-carbon.cc | 1 + .../dnsdist-concurrent-connections.cc | 329 ++++++++++++++++++ .../dnsdist-concurrent-connections.hh | 46 +-- pdns/dnsdistdist/dnsdist-configuration.hh | 11 + pdns/dnsdistdist/dnsdist-console.cc | 14 +- .../dnsdist-lua-configuration-items.cc | 12 + pdns/dnsdistdist/dnsdist-lua-inspection.cc | 6 +- pdns/dnsdistdist/dnsdist-nghttp2-in.cc | 3 +- ...dist-configuration-yaml-items-generated.cc | 33 ++ .../dnsdist-rust-lib/rust/src/lib.rs | 22 ++ .../dnsdist-settings-definitions.yml | 79 ++++- pdns/dnsdistdist/dnsdist-tcp-upstream.hh | 69 +--- pdns/dnsdistdist/dnsdist-tcp.cc | 131 ++++++- pdns/dnsdistdist/dnsdist-tcp.hh | 1 + pdns/dnsdistdist/dnsdist-web.cc | 3 + pdns/dnsdistdist/dnsdist.hh | 4 +- pdns/dnsdistdist/docs/reference/tuning.rst | 110 +++++- .../docs/reference/yaml-settings.rst | 11 + pdns/dnsdistdist/doh.cc | 6 +- pdns/dnsdistdist/meson.build | 1 + regression-tests.dnsdist/test_TCPLimits.py | 16 +- 22 files changed, 781 insertions(+), 131 deletions(-) create mode 100644 pdns/dnsdistdist/dnsdist-concurrent-connections.cc diff --git a/pdns/dnsdistdist/Makefile.am b/pdns/dnsdistdist/Makefile.am index fe8185fa83..c8aafab7a2 100644 --- a/pdns/dnsdistdist/Makefile.am +++ b/pdns/dnsdistdist/Makefile.am @@ -180,7 +180,7 @@ dnsdist_SOURCES = \ dnsdist-backoff.hh \ dnsdist-cache.cc dnsdist-cache.hh \ dnsdist-carbon.cc dnsdist-carbon.hh \ - dnsdist-concurrent-connections.hh \ + dnsdist-concurrent-connections.cc dnsdist-concurrent-connections.hh \ dnsdist-configuration-yaml-internal.hh \ dnsdist-configuration-yaml.cc dnsdist-configuration-yaml.hh \ dnsdist-configuration.cc dnsdist-configuration.hh \ @@ -316,7 +316,7 @@ testrunner_SOURCES = \ dnsdist-backend.cc dnsdist-backend.hh \ dnsdist-backoff.hh \ dnsdist-cache.cc dnsdist-cache.hh \ - dnsdist-concurrent-connections.hh \ + dnsdist-concurrent-connections.cc dnsdist-concurrent-connections.hh \ dnsdist-configuration.cc dnsdist-configuration.hh \ dnsdist-crypto.cc dnsdist-crypto.hh \ dnsdist-dnsparser.cc dnsdist-dnsparser.hh \ diff --git a/pdns/dnsdistdist/dnsdist-carbon.cc b/pdns/dnsdistdist/dnsdist-carbon.cc index 596e0eae10..27c5001ee3 100644 --- a/pdns/dnsdistdist/dnsdist-carbon.cc +++ b/pdns/dnsdistdist/dnsdist-carbon.cc @@ -149,6 +149,7 @@ static bool doOneCarbonExport(const Carbon::Endpoint& endpoint) str << base << "tcpmaxconcurrentconnections" << ' ' << front->tcpMaxConcurrentConnections.load() << " " << now << "\r\n"; str << base << "tcpavgqueriesperconnection" << ' ' << front->tcpAvgQueriesPerConnection.load() << " " << now << "\r\n"; str << base << "tcpavgconnectionduration" << ' ' << front->tcpAvgConnectionDuration.load() << " " << now << "\r\n"; + str << base << "tcpavgreadios" << ' ' << front->tcpAvgIOsPerConnection.load() << " " << now << "\r\n"; str << base << "tls10-queries" << ' ' << front->tls10queries.load() << " " << now << "\r\n"; str << base << "tls11-queries" << ' ' << front->tls11queries.load() << " " << now << "\r\n"; str << base << "tls12-queries" << ' ' << front->tls12queries.load() << " " << now << "\r\n"; diff --git a/pdns/dnsdistdist/dnsdist-concurrent-connections.cc b/pdns/dnsdistdist/dnsdist-concurrent-connections.cc new file mode 100644 index 0000000000..ba191b9d02 --- /dev/null +++ b/pdns/dnsdistdist/dnsdist-concurrent-connections.cc @@ -0,0 +1,329 @@ +/* + * This file is part of PowerDNS or dnsdist. + * Copyright -- PowerDNS.COM B.V. and its contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In addition, for the avoidance of any doubt, permission is granted to + * link this program with OpenSSL and to (re)distribute the binaries + * produced as the result of such linking. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "dnsdist-concurrent-connections.hh" + +#include +#include +#include +#include + +#include + +#include "circular_buffer.hh" +#include "dnsdist-configuration.hh" +#include "dolog.hh" +#include "lock.hh" + +namespace dnsdist +{ + +static constexpr size_t NB_SHARDS = 10; + +struct ClientActivity +{ + uint64_t tcpConnections{0}; + uint64_t tlsNewSessions{0}; /* without resumption */ + uint64_t tlsResumedSessions{0}; + time_t bucketEndTime{0}; +}; + +struct ClientEntry +{ + mutable boost::circular_buffer d_activity; + AddressAndPortRange d_addr; + mutable uint64_t d_concurrentConnections{0}; + mutable time_t d_bannedUntil{0}; + time_t d_lastSeen{0}; +}; + +struct TimeTag +{ +}; +struct AddressTag +{ +}; + +using map_t = boost::multi_index_container< + ClientEntry, + boost::multi_index::indexed_by< + boost::multi_index::hashed_unique, + boost::multi_index::member, AddressAndPortRange::hash>, + boost::multi_index::ordered_non_unique, + boost::multi_index::member>>>; + +static std::vector> s_tcpClientsConnectionMetrics{10}; + +static AddressAndPortRange getRange(const ComboAddress& from) +{ + const auto& immutable = dnsdist::configuration::getImmutableConfiguration(); + return AddressAndPortRange(from, from.isIPv4() ? immutable.d_tcpConnectionsMaskV4 : immutable.d_tcpConnectionsMaskV6, from.isIPv4() ? immutable.d_tcpConnectionsMaskV4Port : 0); +} + +static size_t getShardID(const AddressAndPortRange& from) +{ + auto hash = AddressAndPortRange::hash()(from); + return hash % NB_SHARDS; +} + +static bool checkTCPConnectionsRate(const boost::circular_buffer& activity, time_t now, uint64_t maxTCPRate, uint64_t maxTLSNewRate, uint64_t maxTLSResumedRate, uint64_t interval, bool isTLS) +{ + if (maxTCPRate == 0 && (!isTLS || (maxTLSNewRate == 0 && maxTLSResumedRate == 0))) { + return true; + } + uint64_t bucketsConsidered = 0; + uint64_t connectionsSeen = 0; + uint64_t tlsNewSeen = 0; + uint64_t tlsResumedSeen = 0; + time_t cutOff = now - (interval * 60); + for (const auto& entry : activity) { + if (entry.bucketEndTime < cutOff) { + continue; + } + ++bucketsConsidered; + connectionsSeen += entry.tcpConnections; + tlsNewSeen += entry.tlsNewSessions; + tlsResumedSeen += entry.tlsResumedSessions; + } + if (bucketsConsidered == 0) { + return true; + } + if (maxTCPRate > 0) { + auto rate = connectionsSeen / bucketsConsidered; + if (rate > maxTCPRate) { + return false; + } + } + if (maxTLSNewRate > 0 && isTLS) { + auto rate = tlsNewSeen / bucketsConsidered; + if (rate > maxTLSNewRate) { + return false; + } + } + if (maxTLSResumedRate > 0 && isTLS) { + auto rate = tlsResumedSeen / bucketsConsidered; + if (rate > maxTLSResumedRate) { + return false; + } + } + return true; +} + +void IncomingConcurrentTCPConnectionsManager::cleanup(time_t now) +{ + const auto& immutable = dnsdist::configuration::getImmutableConfiguration(); + const auto interval = immutable.d_tcpConnectionsRatePerClientInterval; + time_t cutOff = now - (interval * 60); + for (auto& shard : s_tcpClientsConnectionMetrics) { + auto db = shard.lock(); + auto& index = db->get(); + for (auto entry = index.begin(); entry != index.end();) { + if (entry->d_lastSeen >= cutOff) { + /* this index is ordered on timestamps, + so the first valid entry we see means we are done */ + break; + } + + entry = index.erase(entry); + } + } +} + +static ClientActivity& getCurrentClientActivity(const ClientEntry& entry, time_t now) +{ + auto& activity = entry.d_activity; + if (activity.empty() || activity.front().bucketEndTime < now) { + activity.push_front(ClientActivity{1, 0, 0, now + 60}); + } + return activity.front(); +} + +IncomingConcurrentTCPConnectionsManager::NewConnectionResult IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(const ComboAddress& from, bool isTLS) +{ + const auto& immutable = dnsdist::configuration::getImmutableConfiguration(); + const auto maxConnsPerClient = immutable.d_maxTCPConnectionsPerClient; + const auto threshold = immutable.d_tcpConnectionsOverloadThreshold; + const auto tcpRate = immutable.d_maxTCPConnectionsRatePerClient; + const auto tlsNewRate = immutable.d_maxTLSNewSessionsRatePerClient; + const auto tlsResumedRate = immutable.d_maxTLSResumedSessionsRatePerClient; + const auto interval = immutable.d_tcpConnectionsRatePerClientInterval; + if (maxConnsPerClient == 0 && tcpRate == 0 && tlsResumedRate == 0 && tlsNewRate == 0 && immutable.d_maxTCPReadIOsPerQuery == 0) { + return NewConnectionResult::Allowed; + } + + auto now = time(nullptr); + auto updateActivity = [now](ClientEntry& entry) { + ++entry.d_concurrentConnections; + entry.d_lastSeen = now; + auto& activity = getCurrentClientActivity(entry, now); + ++activity.tcpConnections; + }; + + auto checkConnectionAllowed = [now, from, maxConnsPerClient, threshold, tcpRate, tlsNewRate, tlsResumedRate, interval, isTLS, &immutable](const ClientEntry& entry) { + if (entry.d_bannedUntil != 0 && entry.d_bannedUntil >= now) { + vinfolog("Refusing TCP connection from %s: banned", from.toStringWithPort()); + return NewConnectionResult::Denied; + } + if (maxConnsPerClient > 0 && entry.d_concurrentConnections >= maxConnsPerClient) { + vinfolog("Refusing TCP connection from %s: too many connections", from.toStringWithPort()); + return NewConnectionResult::Denied; + } + if (!checkTCPConnectionsRate(entry.d_activity, now, tcpRate, tlsNewRate, tlsResumedRate, interval, isTLS)) { + entry.d_bannedUntil = now + immutable.d_tcpBanDurationForExceedingTCPTLSRate; + vinfolog("Banning TCP connections from %s for %d seconds: too many new TCP/TLS connections per second", from.toStringWithPort(), immutable.d_tcpBanDurationForExceedingTCPTLSRate); + return NewConnectionResult::Denied; + } + + if (maxConnsPerClient == 0 || threshold == 0) { + return NewConnectionResult::Allowed; + } + + auto current = (100 * entry.d_concurrentConnections) / maxConnsPerClient; + if (current < threshold) { + return NewConnectionResult::Allowed; + } + vinfolog("Restricting TCP connection from %s: nearly reaching the maximum number of concurrent TCP connections", from.toStringWithPort()); + return NewConnectionResult::Restricted; + }; + + auto addr = getRange(from); + { + auto shardID = getShardID(addr); + auto db = s_tcpClientsConnectionMetrics.at(shardID).lock(); + const auto& entry = db->find(addr); + if (entry == db->end()) { + ClientEntry newEntry; + newEntry.d_activity.set_capacity(interval); + newEntry.d_addr = addr; + newEntry.d_concurrentConnections = 1; + newEntry.d_lastSeen = now; + db->insert(std::move(newEntry)); + return NewConnectionResult::Allowed; + } + auto result = checkConnectionAllowed(*entry); + if (result != NewConnectionResult::Denied) { + db->modify(entry, updateActivity); + } + return result; + } +} + +bool IncomingConcurrentTCPConnectionsManager::isClientOverThreshold(const ComboAddress& from) +{ + const auto& immutable = dnsdist::configuration::getImmutableConfiguration(); + const auto maxConnsPerClient = immutable.d_maxTCPConnectionsPerClient; + if (maxConnsPerClient == 0 || immutable.d_tcpConnectionsOverloadThreshold == 0) { + return false; + } + + size_t count = 0; + auto addr = getRange(from); + auto shardID = getShardID(addr); + { + auto db = s_tcpClientsConnectionMetrics.at(shardID).lock(); + auto it = db->find(addr); + if (it == db->end()) { + return false; + } + count = it->d_concurrentConnections; + } + + auto current = (100 * count) / maxConnsPerClient; + return current >= immutable.d_tcpConnectionsOverloadThreshold; +} + +void IncomingConcurrentTCPConnectionsManager::banClientFor(const ComboAddress& from, time_t now, uint32_t seconds) +{ + auto addr = getRange(from); + auto shardID = getShardID(addr); + { + auto db = s_tcpClientsConnectionMetrics.at(shardID).lock(); + auto it = db->find(addr); + if (it == db->end()) { + return; + } + db->modify(it, [now, seconds](ClientEntry& entry) { + entry.d_lastSeen = now; + entry.d_bannedUntil = now + seconds; + }); + } + vinfolog("Banned TCP client %s for %d seconds", from.toStringWithPort(), seconds); +} + +void IncomingConcurrentTCPConnectionsManager::accountClosedTCPConnection(const ComboAddress& from) +{ + const auto maxConnsPerClient = dnsdist::configuration::getImmutableConfiguration().d_maxTCPConnectionsPerClient; + if (maxConnsPerClient == 0) { + return; + } + auto addr = getRange(from); + auto shardID = getShardID(addr); + { + auto db = s_tcpClientsConnectionMetrics.at(shardID).lock(); + auto it = db->find(addr); + if (it == db->end()) { + return; + } + auto& count = it->d_concurrentConnections; + count--; + } +} + +void IncomingConcurrentTCPConnectionsManager::accountTLSNewSession(const ComboAddress& from) +{ + const auto maxRate = dnsdist::configuration::getImmutableConfiguration().d_maxTLSNewSessionsRatePerClient > 0; + if (maxRate == 0) { + return; + } + auto addr = getRange(from); + auto shardID = getShardID(addr); + { + auto db = s_tcpClientsConnectionMetrics.at(shardID).lock(); + auto it = db->find(addr); + if (it == db->end()) { + return; + } + auto& count = getCurrentClientActivity(*it, time(nullptr)).tlsNewSessions; + count++; + } +} + +void IncomingConcurrentTCPConnectionsManager::accountTLSResumedSession(const ComboAddress& from) +{ + const auto maxRate = dnsdist::configuration::getImmutableConfiguration().d_maxTLSResumedSessionsRatePerClient > 0; + if (maxRate == 0) { + return; + } + auto addr = getRange(from); + auto shardID = getShardID(addr); + { + auto db = s_tcpClientsConnectionMetrics.at(shardID).lock(); + auto it = db->find(addr); + if (it == db->end()) { + return; + } + auto& count = getCurrentClientActivity(*it, time(nullptr)).tlsResumedSessions; + count++; + } +} + +} diff --git a/pdns/dnsdistdist/dnsdist-concurrent-connections.hh b/pdns/dnsdistdist/dnsdist-concurrent-connections.hh index 9827bbc33f..a77b2fa3a5 100644 --- a/pdns/dnsdistdist/dnsdist-concurrent-connections.hh +++ b/pdns/dnsdistdist/dnsdist-concurrent-connections.hh @@ -21,47 +21,25 @@ */ #pragma once -#include #include "iputils.hh" -#include "lock.hh" -#include "dnsdist-configuration.hh" namespace dnsdist { class IncomingConcurrentTCPConnectionsManager { public: - static bool accountNewTCPConnection(const ComboAddress& from) + enum class NewConnectionResult : uint8_t { - const auto maxConnsPerClient = dnsdist::configuration::getImmutableConfiguration().d_maxTCPConnectionsPerClient; - if (maxConnsPerClient == 0) { - return true; - } - auto db = s_tcpClientsConcurrentConnectionsCount.lock(); - auto& count = (*db)[from]; - if (count >= maxConnsPerClient) { - return false; - } - ++count; - return true; - } - - static void accountClosedTCPConnection(const ComboAddress& from) - { - const auto maxConnsPerClient = dnsdist::configuration::getImmutableConfiguration().d_maxTCPConnectionsPerClient; - if (maxConnsPerClient == 0) { - return; - } - auto db = s_tcpClientsConcurrentConnectionsCount.lock(); - auto& count = db->at(from); - count--; - if (count == 0) { - db->erase(from); - } - } - -private: - static LockGuarded> s_tcpClientsConcurrentConnectionsCount; + Allowed = 0, + Denied = 1, + Restricted = 2, + }; + static NewConnectionResult accountNewTCPConnection(const ComboAddress& from, bool isTLS); + static bool isClientOverThreshold(const ComboAddress& from); + static void accountTLSNewSession(const ComboAddress& from); + static void accountTLSResumedSession(const ComboAddress& from); + static void accountClosedTCPConnection(const ComboAddress& from); + static void banClientFor(const ComboAddress& from, time_t now, uint32_t seconds); + static void cleanup(time_t now); }; - } diff --git a/pdns/dnsdistdist/dnsdist-configuration.hh b/pdns/dnsdistdist/dnsdist-configuration.hh index 435cc0964d..29903199d2 100644 --- a/pdns/dnsdistdist/dnsdist-configuration.hh +++ b/pdns/dnsdistdist/dnsdist-configuration.hh @@ -80,6 +80,10 @@ struct ImmutableConfiguration uint64_t d_outgoingDoHMaxIdlePerBackend{10}; uint64_t d_outgoingTCPMaxIdlePerBackend{10}; uint64_t d_maxTCPClientThreads{10}; + uint64_t d_maxTCPConnectionsRatePerClient{0}; + uint64_t d_maxTLSResumedSessionsRatePerClient{0}; + uint64_t d_maxTLSNewSessionsRatePerClient{0}; + uint64_t d_tcpConnectionsRatePerClientInterval{5}; size_t d_maxTCPConnectionsPerClient{0}; size_t d_udpVectorSize{1}; size_t d_ringsCapacity{10000}; @@ -88,8 +92,15 @@ struct ImmutableConfiguration uint32_t d_socketUDPSendBuffer{0}; uint32_t d_socketUDPRecvBuffer{0}; uint32_t d_hashPerturbation{0}; + uint32_t d_maxTCPReadIOsPerQuery{50}; + uint32_t d_tcpBanDurationForExceedingMaxReadIOsPerQuery{60}; + uint32_t d_tcpBanDurationForExceedingTCPTLSRate{10}; uint16_t d_maxUDPOutstanding{std::numeric_limits::max()}; uint8_t d_udpTimeout{2}; + uint8_t d_tcpConnectionsOverloadThreshold{90}; + uint8_t d_tcpConnectionsMaskV4{32}; + uint8_t d_tcpConnectionsMaskV6{128}; + uint8_t d_tcpConnectionsMaskV4Port{0}; bool d_randomizeUDPSocketsToBackend{false}; bool d_randomizeIDsToBackend{false}; bool d_ringsRecordQueries{true}; diff --git a/pdns/dnsdistdist/dnsdist-console.cc b/pdns/dnsdistdist/dnsdist-console.cc index dcdd436cb7..3f1ac4a08b 100644 --- a/pdns/dnsdistdist/dnsdist-console.cc +++ b/pdns/dnsdistdist/dnsdist-console.cc @@ -706,6 +706,8 @@ static const std::vector s_consoleKeywords{ {"setAddEDNSToSelfGeneratedResponses", true, "add", "set whether to add EDNS to self-generated responses, provided that the initial query had EDNS"}, {"setAllowEmptyResponse", true, "allow", "Set to true (defaults to false) to allow empty responses (qdcount=0) with a NoError or NXDomain rcode (default) from backends"}, {"setAPIWritable", true, "bool, dir", "allow modifications via the API. if `dir` is set, it must be a valid directory where the configuration files will be written by the API"}, + {"setBanDurationForExceedingMaxReadIOsPerQuery", true, "n", "Set for how long, in seconds, a client (or range) will be prevented from opening a new TCP connection when it has exceeded the maximum number of read IOs per query over a TCP connection"}, + {"setBanDurationForExceedingTCPTLSRate", true, "n", "Set for how long, in seconds, a client (or range) will be prevented from opening a new TCP connection when it has exceeded the TCP connection or TLS session rates"}, {"setCacheCleaningDelay", true, "num", "Set the interval in seconds between two runs of the cache cleaning algorithm, removing expired entries"}, {"setCacheCleaningPercentage", true, "num", "Set the percentage of the cache that the cache cleaning algorithm will try to free by removing expired entries. By default (100), all expired entries are remove"}, {"setConsistentHashingBalancingFactor", true, "factor", "Set the balancing factor for bounded-load consistent hashing"}, @@ -728,9 +730,13 @@ static const std::vector s_consoleKeywords{ {"setMaxCachedTCPConnectionsPerDownstream", true, "max", "Set the maximum number of inactive TCP connections to a backend cached by each worker TCP thread"}, {"setMaxTCPClientThreads", true, "n", "set the maximum of TCP client threads, handling TCP connections"}, {"setMaxTCPConnectionDuration", true, "n", "set the maximum duration of an incoming TCP connection, in seconds. 0 means unlimited"}, + {"setMaxTCPConnectionRatePerClient", true, "n", "set the maximum number of new TCP connections that a given client can open per second"}, {"setMaxTCPConnectionsPerClient", true, "n", "set the maximum number of TCP connections per client. 0 means unlimited"}, {"setMaxTCPQueriesPerConnection", true, "n", "set the maximum number of queries in an incoming TCP connection. 0 means unlimited"}, {"setMaxTCPQueuedConnections", true, "n", "set the maximum number of TCP connections queued (waiting to be picked up by a client thread)"}, + {"setMaxTCPReadIOsPerQuery", true, "n", "set the maximum number of read events needed to receive a new query on a TCP connection"}, + {"setMaxTLSNewSessionRatePerClient", true, "n", "set the maximum number of new TLS sessions that a given client can open per second"}, + {"setMaxTLSResumedSessionRatePerClient", true, "n", "set the maximum number of resumed TLS sessions that a given client can open per second"}, {"setMaxUDPOutstanding", true, "n", "set the maximum number of outstanding UDP queries to a given backend server. This can only be set at configuration time and defaults to 65535"}, {"setMetric", true, "name, value", "Set the value of a custom metric to the supplied value"}, {"setPayloadSizeOnSelfGeneratedAnswers", true, "payloadSize", "set the UDP payload size advertised via EDNS on self-generated responses"}, @@ -759,9 +765,15 @@ static const std::vector s_consoleKeywords{ {"setStaleCacheEntriesTTL", true, "n", "allows using cache entries expired for at most n seconds when there is no backend available to answer for a query"}, {"setStructuredLogging", true, "value [, options]", "set whether log messages should be in structured-logging-like format"}, {"setSyslogFacility", true, "facility", "set the syslog logging facility to 'facility'. Defaults to LOG_DAEMON"}, + {"setTCPConnectionsMaskV4", true, "n", "Mask to apply to IPv4 addresses when enforcing the TLS connection or TLS sessions rates"}, + {"setTCPConnectionsMaskV4Port", true, "n", "Mask to apply to the port when enforcing the TLS connection or TLS sessions rates for IPv4 addresses"}, + {"setTCPConnectionsMaskV6", true, "n", "Mask to apply to IPv6 addresses when enforcing the TLS connection or TLS sessions rates"}, + {"setTCPConnectionsOverloadThreshold", true, "n", "Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted"}, + {"setTCPConnectionRateInterval", true, "n", "Set the interval, in minutes, over which new TCP and TLS per client connection rates are computed"}, {"setTCPDownstreamCleanupInterval", true, "interval", "minimum interval in seconds between two cleanups of the idle TCP downstream connections"}, - {"setTCPFastOpenKey", true, "string", "TCP Fast Open Key"}, {"setTCPDownstreamMaxIdleTime", true, "time", "Maximum time in seconds that a downstream TCP connection to a backend might stay idle"}, + {"setTCPConnectionsOverloadThreshold", true, "n", "Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted: only query per connection is allowed (no out-of-order processing, no idle time allowed), the receive timeout is reduced to 500 milliseconds and the total duration of the TCP connection is limited to 5 seconds"}, + {"setTCPFastOpenKey", true, "string", "TCP Fast Open Key"}, {"setTCPInternalPipeBufferSize", true, "size", "Set the size in bytes of the internal buffer of the pipes used internally to distribute connections to TCP (and DoT) workers threads"}, {"setTCPRecvTimeout", true, "n", "set the read timeout on TCP connections from the client, in seconds"}, {"setTCPSendTimeout", true, "n", "set the write timeout on TCP connections from the client, in seconds"}, diff --git a/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc b/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc index 6c07c07a10..546f027b20 100644 --- a/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc +++ b/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc @@ -149,6 +149,18 @@ static const std::map s {"setUDPTimeout", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_udpTimeout = newValue; }, std::numeric_limits::max()}}, {"setConsoleMaximumConcurrentConnections", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_consoleMaxConcurrentConnections = newValue; }, std::numeric_limits::max()}}, {"setRingBuffersLockRetries", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_ringsNbLockTries = newValue; }, std::numeric_limits::max()}}, + {"setMaxTCPConnectionRatePerClient", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_maxTCPConnectionsRatePerClient = newValue; }, std::numeric_limits::max()}}, + {"setMaxTLSResumedSessionRatePerClient", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_maxTLSResumedSessionsRatePerClient = newValue; }, std::numeric_limits::max()}}, + {"setMaxTLSNewSessionRatePerClient", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_maxTLSNewSessionsRatePerClient = newValue; }, std::numeric_limits::max()}}, + {"setTCPConnectionRateInterval", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsRatePerClientInterval = newValue; }, std::numeric_limits::max()}}, + {"setMaxTCPReadIOsPerQuery", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_maxTCPReadIOsPerQuery = newValue; }, std::numeric_limits::max()}}, + {"setBanDurationForExceedingMaxReadIOsPerQuery", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpBanDurationForExceedingMaxReadIOsPerQuery = newValue; }, std::numeric_limits::max()}}, + {"setBanDurationForExceedingTCPTLSRate", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpBanDurationForExceedingTCPTLSRate = newValue; }, std::numeric_limits::max()}}, + {"setTCPConnectionsOverloadThreshold", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsOverloadThreshold = newValue; }, std::numeric_limits::max()}}, + {"setTCPConnectionsMaskV4", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsMaskV4 = newValue; }, std::numeric_limits::max()}}, + {"setTCPConnectionsMaskV6", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsMaskV6 = newValue; }, std::numeric_limits::max()}}, + {"setTCPConnectionsMaskV4Port", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsMaskV4Port = newValue; }, std::numeric_limits::max()}}, + {"setTCPConnectionsOverloadThreshold", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsOverloadThreshold = newValue; }, 100}}, }; static const std::map s_doubleImmutableConfigItems{ diff --git a/pdns/dnsdistdist/dnsdist-lua-inspection.cc b/pdns/dnsdistdist/dnsdist-lua-inspection.cc index 95be35ad37..ceaa4ba917 100644 --- a/pdns/dnsdistdist/dnsdist-lua-inspection.cc +++ b/pdns/dnsdistdist/dnsdist-lua-inspection.cc @@ -738,12 +738,12 @@ void setupLuaInspection(LuaContext& luaCtx) ret << endl; ret << "Frontends:" << endl; - fmt = boost::format("%-3d %-20.20s %-20d %-20d %-20d %-25d %-20d %-20d %-20d %-20f %-20f %-20d %-20d %-25d %-25d %-15d %-15d %-15d %-15d %-15d"); - ret << (fmt % "#" % "Address" % "Connections" % "Max concurrent conn" % "Died reading query" % "Died sending response" % "Gave up" % "Client timeouts" % "Downstream timeouts" % "Avg queries/conn" % "Avg duration" % "TLS new sessions" % "TLS Resumptions" % "TLS unknown ticket keys" % "TLS inactive ticket keys" % "TLS 1.0" % "TLS 1.1" % "TLS 1.2" % "TLS 1.3" % "TLS other") << endl; + fmt = boost::format("%-3d %-20.20s %-20d %-20d %-20d %-25d %-20d %-20d %-20d %-20f %-20f %-20d %-20d %-25d %-25d %-15d %-15d %-15d %-15d %-15d %-15d"); + ret << (fmt % "#" % "Address" % "Connections" % "Max concurrent conn" % "Died reading query" % "Died sending response" % "Gave up" % "Client timeouts" % "Downstream timeouts" % "Avg queries/conn" % "Avg duration" % "Avg read IOs/conn" % "TLS new sessions" % "TLS Resumptions" % "TLS unknown ticket keys" % "TLS inactive ticket keys" % "TLS 1.0" % "TLS 1.1" % "TLS 1.2" % "TLS 1.3" % "TLS other") << endl; size_t counter = 0; for (const auto& frontend : dnsdist::getFrontends()) { - ret << (fmt % counter % frontend->local.toStringWithPort() % frontend->tcpCurrentConnections % frontend->tcpMaxConcurrentConnections % frontend->tcpDiedReadingQuery % frontend->tcpDiedSendingResponse % frontend->tcpGaveUp % frontend->tcpClientTimeouts % frontend->tcpDownstreamTimeouts % frontend->tcpAvgQueriesPerConnection % frontend->tcpAvgConnectionDuration % frontend->tlsNewSessions % frontend->tlsResumptions % frontend->tlsUnknownTicketKey % frontend->tlsInactiveTicketKey % frontend->tls10queries % frontend->tls11queries % frontend->tls12queries % frontend->tls13queries % frontend->tlsUnknownqueries) << endl; + ret << (fmt % counter % frontend->local.toStringWithPort() % frontend->tcpCurrentConnections % frontend->tcpMaxConcurrentConnections % frontend->tcpDiedReadingQuery % frontend->tcpDiedSendingResponse % frontend->tcpGaveUp % frontend->tcpClientTimeouts % frontend->tcpDownstreamTimeouts % frontend->tcpAvgQueriesPerConnection % frontend->tcpAvgConnectionDuration % frontend->tcpAvgIOsPerConnection % frontend->tlsNewSessions % frontend->tlsResumptions % frontend->tlsUnknownTicketKey % frontend->tlsInactiveTicketKey % frontend->tls10queries % frontend->tls11queries % frontend->tls12queries % frontend->tls13queries % frontend->tlsUnknownqueries) << endl; ++counter; } ret << endl; diff --git a/pdns/dnsdistdist/dnsdist-nghttp2-in.cc b/pdns/dnsdistdist/dnsdist-nghttp2-in.cc index 8c92b17eea..d7ff12bf4a 100644 --- a/pdns/dnsdistdist/dnsdist-nghttp2-in.cc +++ b/pdns/dnsdistdist/dnsdist-nghttp2-in.cc @@ -286,7 +286,8 @@ bool IncomingHTTP2Connection::checkALPN() void IncomingHTTP2Connection::handleConnectionReady() { constexpr std::array settings{{{NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 100U}}}; - auto ret = nghttp2_submit_settings(d_session.get(), NGHTTP2_FLAG_NONE, settings.data(), settings.size()); + constexpr std::array nearLimitsSettings{{{NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 1U}}}; + auto ret = nghttp2_submit_settings(d_session.get(), NGHTTP2_FLAG_NONE, isNearTCPLimits() ? nearLimitsSettings.data() : settings.data(), isNearTCPLimits() ? nearLimitsSettings.size() : settings.size()); if (ret != 0) { throw std::runtime_error("Fatal error: " + std::string(nghttp2_strerror(ret))); } diff --git a/pdns/dnsdistdist/dnsdist-rust-lib/dnsdist-configuration-yaml-items-generated.cc b/pdns/dnsdistdist/dnsdist-rust-lib/dnsdist-configuration-yaml-items-generated.cc index ddbea89ed4..1c03496abc 100644 --- a/pdns/dnsdistdist/dnsdist-rust-lib/dnsdist-configuration-yaml-items-generated.cc +++ b/pdns/dnsdistdist/dnsdist-rust-lib/dnsdist-configuration-yaml-items-generated.cc @@ -166,6 +166,39 @@ void convertImmutableFlatSettingsFromRust(const dnsdist::rust::settings::GlobalC if (config.d_maxTCPConnectionsPerClient == 0) { config.d_maxTCPConnectionsPerClient = yamlConfig.tuning.tcp.max_connections_per_client; } + if (config.d_tcpConnectionsOverloadThreshold == 90) { + config.d_tcpConnectionsOverloadThreshold = yamlConfig.tuning.tcp.connections_overload_threshold; + } + if (config.d_maxTCPConnectionsRatePerClient == 0) { + config.d_maxTCPConnectionsRatePerClient = yamlConfig.tuning.tcp.max_connection_rate_per_client; + } + if (config.d_tcpConnectionsRatePerClientInterval == 5) { + config.d_tcpConnectionsRatePerClientInterval = yamlConfig.tuning.tcp.connection_rate_interval; + } + if (config.d_maxTLSNewSessionsRatePerClient == 0) { + config.d_maxTLSNewSessionsRatePerClient = yamlConfig.tuning.tcp.max_tls_new_session_rate_per_client; + } + if (config.d_maxTLSResumedSessionsRatePerClient == 0) { + config.d_maxTLSResumedSessionsRatePerClient = yamlConfig.tuning.tcp.max_tls_resumed_session_rate_per_client; + } + if (config.d_maxTCPReadIOsPerQuery == 50) { + config.d_maxTCPReadIOsPerQuery = yamlConfig.tuning.tcp.max_read_ios_per_query; + } + if (config.d_tcpBanDurationForExceedingMaxReadIOsPerQuery == 60) { + config.d_tcpBanDurationForExceedingMaxReadIOsPerQuery = yamlConfig.tuning.tcp.ban_duration_for_exceeding_max_read_ios_per_query; + } + if (config.d_tcpBanDurationForExceedingTCPTLSRate == 10) { + config.d_tcpBanDurationForExceedingTCPTLSRate = yamlConfig.tuning.tcp.ban_duration_for_exceeding_tcp_tls_rate; + } + if (config.d_tcpConnectionsMaskV4 == 32) { + config.d_tcpConnectionsMaskV4 = yamlConfig.tuning.tcp.connections_mask_v4; + } + if (config.d_tcpConnectionsMaskV6 == 128) { + config.d_tcpConnectionsMaskV6 = yamlConfig.tuning.tcp.connections_mask_v6; + } + if (config.d_tcpConnectionsMaskV4Port == 0) { + config.d_tcpConnectionsMaskV4Port = yamlConfig.tuning.tcp.connections_mask_v4_port; + } if (config.d_udpVectorSize == 1) { config.d_udpVectorSize = yamlConfig.tuning.udp.messages_per_round; } diff --git a/pdns/dnsdistdist/dnsdist-rust-lib/rust/src/lib.rs b/pdns/dnsdistdist/dnsdist-rust-lib/rust/src/lib.rs index 9452e1fcdc..1eb879e7ce 100644 --- a/pdns/dnsdistdist/dnsdist-rust-lib/rust/src/lib.rs +++ b/pdns/dnsdistdist/dnsdist-rust-lib/rust/src/lib.rs @@ -1885,6 +1885,28 @@ mod dnsdistsettings { max_connections_per_client: u32, #[serde(default, skip_serializing_if = "crate::is_default")] fast_open_key: String, + #[serde(default = "crate::U8::<90>::value", skip_serializing_if = "crate::U8::<90>::is_equal")] + connections_overload_threshold: u8, + #[serde(default, skip_serializing_if = "crate::is_default")] + max_connection_rate_per_client: u64, + #[serde(default = "crate::U64::<5>::value", skip_serializing_if = "crate::U64::<5>::is_equal")] + connection_rate_interval: u64, + #[serde(default, skip_serializing_if = "crate::is_default")] + max_tls_new_session_rate_per_client: u64, + #[serde(default, skip_serializing_if = "crate::is_default")] + max_tls_resumed_session_rate_per_client: u64, + #[serde(default = "crate::U32::<50>::value", skip_serializing_if = "crate::U32::<50>::is_equal")] + max_read_ios_per_query: u32, + #[serde(default = "crate::U32::<60>::value", skip_serializing_if = "crate::U32::<60>::is_equal")] + ban_duration_for_exceeding_max_read_ios_per_query: u32, + #[serde(default = "crate::U32::<10>::value", skip_serializing_if = "crate::U32::<10>::is_equal")] + ban_duration_for_exceeding_tcp_tls_rate: u32, + #[serde(default = "crate::U8::<32>::value", skip_serializing_if = "crate::U8::<32>::is_equal")] + connections_mask_v4: u8, + #[serde(default = "crate::U8::<128>::value", skip_serializing_if = "crate::U8::<128>::is_equal")] + connections_mask_v6: u8, + #[serde(default, skip_serializing_if = "crate::is_default")] + connections_mask_v4_port: u8, } #[derive(Deserialize, Serialize, Debug, PartialEq)] diff --git a/pdns/dnsdistdist/dnsdist-settings-definitions.yml b/pdns/dnsdistdist/dnsdist-settings-definitions.yml index 754ccf0cd8..a1f4104ce5 100644 --- a/pdns/dnsdistdist/dnsdist-settings-definitions.yml +++ b/pdns/dnsdistdist/dnsdist-settings-definitions.yml @@ -676,7 +676,7 @@ dynamic_rules: - name: "mask_port" type: u8 default: "0" - description: "Number of bits of port to consider over IPv4, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if the IPv4 parameter is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``" + description: "Number of bits of the port number to consider over IPv4, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if the IPv4 parameter is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``" - name: "exclude_ranges" type: "Vec" default: "" @@ -1515,6 +1515,83 @@ tcp_tuning: default: "" lua-name: "setTCPFastOpenKey" runtime-configurable: false + - name: "connections_overload_threshold" + type: "u8" + default: "90" + lua-name: "setTCPConnectionsOverloadThreshold" + internal-field-name: "d_tcpConnectionsOverloadThreshold" + runtime-configurable: false + description: "Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted: only query per connection is allowed (no out-of-order processing, no idle time allowed), the receive timeout is reduced to 500 milliseconds and the total duration of the TCP connection is limited to 5 seconds" + - name: "max_connection_rate_per_client" + type: "u64" + default: "0" + lua-name: "setMaxTCPConnectionRatePerClient" + internal-field-name: "d_maxTCPConnectionsRatePerClient" + runtime-configurable: false + description: "Set the maximum number of new TCP connections that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``" + - name: "connection_rate_interval" + type: "u64" + default: "5" + lua-name: "setTCPConnectionRateInterval" + internal-field-name: "d_tcpConnectionsRatePerClientInterval" + runtime-configurable: false + description: "Set the interval, in minutes, over which new TCP and TLS per client connection rates are computed (see ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``)" + - name: "max_tls_new_session_rate_per_client" + type: "u64" + default: "0" + lua-name: "setMaxTLSNewSessionRatePerClient" + internal-field-name: "d_maxTLSNewSessionsRatePerClient" + runtime-configurable: false + description: "Set the maximum number of new TLS sessions, without resumption, that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_connection_rate_per_client`` and ```max_tls_resumed_session_rate_per_client`" + - name: "max_tls_resumed_session_rate_per_client" + type: "u64" + default: "0" + lua-name: "setMaxTLSResumedSessionRatePerClient" + internal-field-name: "d_maxTLSResumedSessionsRatePerClient" + runtime-configurable: false + description: "Set the maximum number of resumed TLS sessions that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_connection_rate_per_client`` and ```max_tls_new_session_rate_per_client`" + - name: "max_read_ios_per_query" + type: "u32" + default: "50" + lua-name: "setMaxTCPReadIOsPerQuery" + internal-field-name: "d_maxTCPReadIOsPerQuery" + runtime-configurable: false + description: "Set the maximum number of read events needed to receive a new query on a TCP connection. Usually reading a DNS query over a TCP connection requires two read events, one to read the query size and one to read the query itself. For large queries, on congested networks, a few short reads might occur, increasing the number of read operations needed to read the full query, but if a large number of read events is needed the client might be misbehaving or even actively trying to hurt the server. When this limit is reached, the TCP connection will be terminated and the offending client IP (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection for up to ``ban_duration_for_exceeding_max_read_ios_per_query`` seconds" + - name: "ban_duration_for_exceeding_max_read_ios_per_query" + type: "u32" + default: "60" + lua-name: "setBanDurationForExceedingMaxReadIOsPerQuery" + internal-field-name: "d_tcpBanDurationForExceedingMaxReadIOsPerQuery" + runtime-configurable: false + description: "Set for how long, in seconds, a client (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded ``max_read_ios_per_query`` over a TCP connection" + - name: "ban_duration_for_exceeding_tcp_tls_rate" + type: "u32" + default: "10" + lua-name: "setBanDurationForExceedingTCPTLSRate" + internal-field-name: "d_tcpBanDurationForExceedingTCPTLSRate" + runtime-configurable: false + description: "Set for how long, in seconds, a client (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` or ``max_tls_resumed_session_rate_per_client``" + - name: "connections_mask_v4" + type: "u8" + default: "32" + lua-name: "setTCPConnectionsMaskV4" + internal-field-name: "d_tcpConnectionsMaskV4" + runtime-configurable: false + description: "Mask to apply to IPv4 addresses when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``. In some scenarios it might make sense to apply these settings to a /28 range rather than a single address, for example" + - name: "connections_mask_v6" + type: "u8" + default: "128" + lua-name: "setTCPConnectionsMaskV6" + internal-field-name: "d_tcpConnectionsMaskV6" + runtime-configurable: false + description: "Mask to apply to IPv6 addresses when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``. In some scenarios it might make sense to apply these settings to a whole /64 IPv6 range instead of a single address, for example" + - name: "connections_mask_v4_port" + type: u8 + default: "0" + lua-name: "setTCPConnectionsMaskV4Port" + internal-field-name: "d_tcpConnectionsMaskV4Port" + runtime-configurable: false + description: "Number of bits of port to consider when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client`` over IPv4, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if ``connections_mask_v4`` is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``" udp_tuning: category: "tuning.udp" diff --git a/pdns/dnsdistdist/dnsdist-tcp-upstream.hh b/pdns/dnsdistdist/dnsdist-tcp-upstream.hh index 984f5d03ad..27497c0cb0 100644 --- a/pdns/dnsdistdist/dnsdist-tcp-upstream.hh +++ b/pdns/dnsdistdist/dnsdist-tcp-upstream.hh @@ -51,69 +51,9 @@ public: void resetForNewQuery(); - boost::optional getClientReadTTD(struct timeval now) const - { - const auto& runtimeConfiguration = dnsdist::configuration::getCurrentRuntimeConfiguration(); - if (runtimeConfiguration.d_maxTCPConnectionDuration == 0 && runtimeConfiguration.d_tcpRecvTimeout == 0) { - return boost::none; - } - - if (runtimeConfiguration.d_maxTCPConnectionDuration > 0) { - auto elapsed = now.tv_sec - d_connectionStartTime.tv_sec; - if (elapsed < 0 || (static_cast(elapsed) >= runtimeConfiguration.d_maxTCPConnectionDuration)) { - return now; - } - auto remaining = runtimeConfiguration.d_maxTCPConnectionDuration - elapsed; - if (runtimeConfiguration.d_tcpRecvTimeout == 0 || remaining <= static_cast(runtimeConfiguration.d_tcpRecvTimeout)) { - now.tv_sec += remaining; - return now; - } - } - - now.tv_sec += runtimeConfiguration.d_tcpRecvTimeout; - return now; - } - - boost::optional getClientWriteTTD(const struct timeval& now) const - { - const auto& runtimeConfiguration = dnsdist::configuration::getCurrentRuntimeConfiguration(); - if (runtimeConfiguration.d_maxTCPConnectionDuration == 0 && runtimeConfiguration.d_tcpSendTimeout == 0) { - return boost::none; - } - - timeval res(now); - - if (runtimeConfiguration.d_maxTCPConnectionDuration > 0) { - auto elapsed = res.tv_sec - d_connectionStartTime.tv_sec; - if (elapsed < 0 || static_cast(elapsed) >= runtimeConfiguration.d_maxTCPConnectionDuration) { - return res; - } - auto remaining = runtimeConfiguration.d_maxTCPConnectionDuration - elapsed; - if (runtimeConfiguration.d_tcpSendTimeout == 0 || remaining <= static_cast(runtimeConfiguration.d_tcpSendTimeout)) { - res.tv_sec += remaining; - return res; - } - } - - res.tv_sec += runtimeConfiguration.d_tcpSendTimeout; - return res; - } - - bool maxConnectionDurationReached(unsigned int maxConnectionDuration, const struct timeval& now) - { - if (maxConnectionDuration) { - time_t curtime = now.tv_sec; - unsigned int elapsed = 0; - if (curtime > d_connectionStartTime.tv_sec) { // To prevent issues when time goes backward - elapsed = curtime - d_connectionStartTime.tv_sec; - } - if (elapsed >= maxConnectionDuration) { - return true; - } - } - - return false; - } + boost::optional getClientReadTTD(timeval now) const; + boost::optional getClientWriteTTD(const timeval& now) const; + bool maxConnectionDurationReached(unsigned int maxConnectionDuration, const timeval& now) const; std::shared_ptr getDownstreamConnection(std::shared_ptr& backend, const std::unique_ptr>& tlvs, const struct timeval& now); void registerOwnedDownstreamConnection(std::shared_ptr& conn); @@ -186,6 +126,7 @@ public: IOState handleIncomingQueryReceived(const struct timeval& now); void handleExceptionDuringIO(const std::exception& exp); bool readIncomingQuery(const timeval& now, IOState& iostate); + bool isNearTCPLimits() const; enum class State : uint8_t { starting, doingHandshake, readingProxyProtocolHeader, waitingForQuery, readingQuerySize, readingQuery, sendingResponse, idle /* in case of XFR, we stop processing queries */ }; @@ -206,12 +147,14 @@ public: std::unique_ptr d_ioState{nullptr}; std::unique_ptr> d_proxyProtocolValues{nullptr}; TCPClientThreadData& d_threadData; + uint64_t d_readIOsTotal{0}; size_t d_currentPos{0}; size_t d_proxyProtocolNeed{0}; size_t d_queriesCount{0}; size_t d_currentQueriesCount{0}; std::thread::id d_creatorThreadID; uint16_t d_querySize{0}; + uint16_t d_readIOsCurrentQuery{0}; State d_state{State::starting}; bool d_isXFR{false}; bool d_proxyProtocolPayloadHasTLV{false}; diff --git a/pdns/dnsdistdist/dnsdist-tcp.cc b/pdns/dnsdistdist/dnsdist-tcp.cc index c0fc71b322..a172d1c11d 100644 --- a/pdns/dnsdistdist/dnsdist-tcp.cc +++ b/pdns/dnsdistdist/dnsdist-tcp.cc @@ -63,8 +63,6 @@ std::atomic g_tcpStatesDumpRequested{0}; -LockGuarded> dnsdist::IncomingConcurrentTCPConnectionsManager::s_tcpClientsConcurrentConnectionsCount; - IncomingTCPConnectionState::~IncomingTCPConnectionState() { dnsdist::IncomingConcurrentTCPConnectionsManager::accountClosedTCPConnection(d_ci.remote); @@ -74,7 +72,7 @@ IncomingTCPConnectionState::~IncomingTCPConnectionState() gettimeofday(&now, nullptr); auto diff = now - d_connectionStartTime; - d_ci.cs->updateTCPMetrics(d_queriesCount, diff.tv_sec * 1000 + diff.tv_usec / 1000); + d_ci.cs->updateTCPMetrics(d_queriesCount, diff.tv_sec * 1000 + diff.tv_usec / 1000, d_queriesCount > 0 ? d_readIOsTotal / d_queriesCount : d_readIOsTotal); } // would have been done when the object is destroyed anyway, @@ -125,6 +123,29 @@ static std::pair, bool> getOwnedDownstre return {nullptr, tlvsMismatch}; } +bool IncomingTCPConnectionState::isNearTCPLimits() const +{ + if (d_ci.d_restricted) { + return true; + } + + const auto tcpConnectionsOverloadThreshold = dnsdist::configuration::getImmutableConfiguration().d_tcpConnectionsOverloadThreshold; + if (tcpConnectionsOverloadThreshold == 0) { + return false; + } + + const auto& clientState = d_ci.cs; + if (clientState->d_tcpConcurrentConnectionsLimit > 0) { + auto concurrentConnections = clientState->tcpCurrentConnections.load(); + auto current = (100 * concurrentConnections) / clientState->d_tcpConcurrentConnectionsLimit; + if (current >= tcpConnectionsOverloadThreshold) { + return true; + } + } + + return dnsdist::IncomingConcurrentTCPConnectionsManager::isClientOverThreshold(d_ci.remote); +} + std::shared_ptr IncomingTCPConnectionState::getDownstreamConnection(std::shared_ptr& backend, const std::unique_ptr>& tlvs, const struct timeval& now) { auto [downstream, tlvsMismatch] = getOwnedDownstreamConnection(d_ownedConnectionsToBackend, backend, tlvs); @@ -264,6 +285,12 @@ bool IncomingTCPConnectionState::canAcceptNewQueries(const struct timeval& now) return false; } + if (isNearTCPLimits()) { + d_ci.d_restricted = true; + DEBUGLOG("not accepting new queries because we already near our TCP limits"); + return false; + } + // for DoH, this is already handled by the underlying library if (!d_ci.cs->dohFrontend && d_currentQueriesCount >= d_ci.cs->d_maxInFlightQueriesPerConn) { DEBUGLOG("not accepting new queries because we already have " << d_currentQueriesCount << " out of " << d_ci.cs->d_maxInFlightQueriesPerConn); @@ -290,6 +317,85 @@ void IncomingTCPConnectionState::resetForNewQuery() d_currentPos = 0; d_querySize = 0; d_state = State::waitingForQuery; + d_readIOsTotal += d_readIOsCurrentQuery; + d_readIOsCurrentQuery = 0; +} + +boost::optional IncomingTCPConnectionState::getClientReadTTD(timeval now) const +{ + const auto& runtimeConfiguration = dnsdist::configuration::getCurrentRuntimeConfiguration(); + if (!isNearTCPLimits() && runtimeConfiguration.d_maxTCPConnectionDuration == 0 && runtimeConfiguration.d_tcpRecvTimeout == 0) { + return boost::none; + } + + size_t maxTCPConnectionDuration = runtimeConfiguration.d_maxTCPConnectionDuration; + uint16_t tcpRecvTimeout = runtimeConfiguration.d_tcpRecvTimeout; + uint32_t tcpRecvTimeoutUsec = 0U; + if (isNearTCPLimits()) { + constexpr size_t maxTCPConnectionDurationNearLimits = 5U; + constexpr uint32_t tcpRecvTimeoutUsecNearLimits = 500U * 1000U; + maxTCPConnectionDuration = runtimeConfiguration.d_maxTCPConnectionDuration != 0 ? std::min(runtimeConfiguration.d_maxTCPConnectionDuration, maxTCPConnectionDurationNearLimits) : maxTCPConnectionDurationNearLimits; + tcpRecvTimeout = 0; + tcpRecvTimeoutUsec = tcpRecvTimeoutUsecNearLimits; + } + + if (maxTCPConnectionDuration > 0) { + auto elapsed = now.tv_sec - d_connectionStartTime.tv_sec; + if (elapsed < 0 || (static_cast(elapsed) >= maxTCPConnectionDuration)) { + return now; + } + auto remaining = maxTCPConnectionDuration - elapsed; + if (!isNearTCPLimits() && (runtimeConfiguration.d_tcpRecvTimeout == 0 || remaining <= static_cast(runtimeConfiguration.d_tcpRecvTimeout))) { + now.tv_sec += static_cast(remaining); + return now; + } + } + + now.tv_sec += static_cast(tcpRecvTimeout); + now.tv_usec += tcpRecvTimeoutUsec; + normalizeTV(now); + return now; +} + +boost::optional IncomingTCPConnectionState::getClientWriteTTD(const timeval& now) const +{ + const auto& runtimeConfiguration = dnsdist::configuration::getCurrentRuntimeConfiguration(); + if (runtimeConfiguration.d_maxTCPConnectionDuration == 0 && runtimeConfiguration.d_tcpSendTimeout == 0) { + return boost::none; + } + + timeval res(now); + + if (runtimeConfiguration.d_maxTCPConnectionDuration > 0) { + auto elapsed = res.tv_sec - d_connectionStartTime.tv_sec; + if (elapsed < 0 || static_cast(elapsed) >= runtimeConfiguration.d_maxTCPConnectionDuration) { + return res; + } + auto remaining = runtimeConfiguration.d_maxTCPConnectionDuration - elapsed; + if (runtimeConfiguration.d_tcpSendTimeout == 0 || remaining <= static_cast(runtimeConfiguration.d_tcpSendTimeout)) { + res.tv_sec += static_cast(remaining); + return res; + } + } + + res.tv_sec += static_cast(runtimeConfiguration.d_tcpSendTimeout); + return res; +} + +bool IncomingTCPConnectionState::maxConnectionDurationReached(unsigned int maxConnectionDuration, const timeval& now) const +{ + if (maxConnectionDuration > 0) { + time_t curtime = now.tv_sec; + unsigned int elapsed = 0; + if (curtime > d_connectionStartTime.tv_sec) { // To prevent issues when time goes backward + elapsed = curtime - d_connectionStartTime.tv_sec; + } + if (elapsed >= maxConnectionDuration) { + return true; + } + } + + return false; } void IncomingTCPConnectionState::registerOwnedDownstreamConnection(std::shared_ptr& conn) @@ -892,9 +998,11 @@ void IncomingTCPConnectionState::handleHandshakeDone(const struct timeval& now) if (d_handler.isTLS()) { if (!d_handler.hasTLSSessionBeenResumed()) { ++d_ci.cs->tlsNewSessions; + dnsdist::IncomingConcurrentTCPConnectionsManager::accountTLSNewSession(d_ci.remote); } else { ++d_ci.cs->tlsResumptions; + dnsdist::IncomingConcurrentTCPConnectionsManager::accountTLSResumedSession(d_ci.remote); } if (d_handler.getResumedFromInactiveTicketKey()) { ++d_ci.cs->tlsInactiveTicketKey; @@ -1040,6 +1148,7 @@ bool IncomingTCPConnectionState::readIncomingQuery(const timeval& now, IOState& if (!d_lastIOBlocked && (d_state == State::waitingForQuery || d_state == State::readingQuerySize)) { DEBUGLOG("reading query size"); d_buffer.resize(sizeof(uint16_t)); + d_readIOsCurrentQuery++; iostate = d_handler.tryRead(d_buffer, d_currentPos, sizeof(uint16_t)); if (d_currentPos > 0) { /* if we got at least one byte, we can't go around sending responses */ @@ -1070,6 +1179,7 @@ bool IncomingTCPConnectionState::readIncomingQuery(const timeval& now, IOState& if (!d_lastIOBlocked && d_state == State::readingQuery) { DEBUGLOG("reading query"); + d_readIOsCurrentQuery++; iostate = d_handler.tryRead(d_buffer, d_currentPos, d_querySize); if (iostate == IOState::Done) { iostate = handleIncomingQueryReceived(now); @@ -1101,6 +1211,13 @@ void IncomingTCPConnectionState::handleIO() return; } + const auto& immutable = dnsdist::configuration::getImmutableConfiguration(); + if (d_readIOsCurrentQuery >= immutable.d_maxTCPReadIOsPerQuery) { + vinfolog("Terminating TCP connection from %s for reaching the maximum number of read IO events per query (%d)", d_ci.remote.toStringWithPort(), immutable.d_maxTCPReadIOsPerQuery); + dnsdist::IncomingConcurrentTCPConnectionsManager::banClientFor(d_ci.remote, time(nullptr), immutable.d_tcpBanDurationForExceedingMaxReadIOsPerQuery); + return; + } + d_lastIOBlocked = false; try { @@ -1566,6 +1683,7 @@ static void tcpClientThread(pdns::channel::Receiver&& queryRecei try { t_downstreamTCPConnectionsManager.cleanupClosedConnections(now); + dnsdist::IncomingConcurrentTCPConnectionsManager::cleanup(time(nullptr)); if (now.tv_sec > lastTimeoutScan) { lastTimeoutScan = now.tv_sec; @@ -1642,11 +1760,14 @@ static void acceptNewConnection(const TCPAcceptorParam& param, TCPClientThreadDa return; } - if (!dnsdist::IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(remote)) { - vinfolog("Dropping TCP connection from %s because we have too many from this client already", remote.toStringWithPort()); + auto connectionResult = dnsdist::IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(remote, connInfo.cs->hasTLS()); + if (connectionResult == dnsdist::IncomingConcurrentTCPConnectionsManager::NewConnectionResult::Denied) { return; } tcpClientCountIncremented = true; + if (connectionResult == dnsdist::IncomingConcurrentTCPConnectionsManager::NewConnectionResult::Restricted) { + connInfo.d_restricted = true; + } vinfolog("Got TCP connection from %s", remote.toStringWithPort()); diff --git a/pdns/dnsdistdist/dnsdist-tcp.hh b/pdns/dnsdistdist/dnsdist-tcp.hh index f3d827ebf3..382f1a6187 100644 --- a/pdns/dnsdistdist/dnsdist-tcp.hh +++ b/pdns/dnsdistdist/dnsdist-tcp.hh @@ -73,6 +73,7 @@ struct ConnectionInfo ComboAddress remote; ClientState* cs{nullptr}; int fd{-1}; + bool d_restricted{false}; }; class InternalQuery diff --git a/pdns/dnsdistdist/dnsdist-web.cc b/pdns/dnsdistdist/dnsdist-web.cc index 4eb57f6be5..b1f91eebd7 100644 --- a/pdns/dnsdistdist/dnsdist-web.cc +++ b/pdns/dnsdistdist/dnsdist-web.cc @@ -688,6 +688,8 @@ static void handlePrometheus(const YaHTTP::Request& req, YaHTTP::Response& resp) output << "# TYPE " << frontsbase << "tcpavgqueriesperconnection " << "gauge" << "\n"; output << "# HELP " << frontsbase << "tcpavgconnectionduration " << "The average duration of a TCP connection (ms)" << "\n"; output << "# TYPE " << frontsbase << "tcpavgconnectionduration " << "gauge" << "\n"; + output << "# HELP " << frontsbase << "tcpavgreadios " << "The average number of read IO operations per query over a TCP connection" << "\n"; + output << "# TYPE " << frontsbase << "tcpavgreadios " << "gauge" << "\n"; output << "# HELP " << frontsbase << "tlsqueries " << "Number of queries received by dnsdist over TLS, by TLS version" << "\n"; output << "# TYPE " << frontsbase << "tlsqueries " << "counter" << "\n"; output << "# HELP " << frontsbase << "tlsnewsessions " << "Amount of new TLS sessions negotiated" << "\n"; @@ -734,6 +736,7 @@ static void handlePrometheus(const YaHTTP::Request& req, YaHTTP::Response& resp) output << frontsbase << "tcpmaxconcurrentconnections" << label << front->tcpMaxConcurrentConnections.load() << "\n"; output << frontsbase << "tcpavgqueriesperconnection" << label << front->tcpAvgQueriesPerConnection.load() << "\n"; output << frontsbase << "tcpavgconnectionduration" << label << front->tcpAvgConnectionDuration.load() << "\n"; + output << frontsbase << "tcpavgreadios" << label << front->tcpAvgIOsPerConnection << "\n"; if (front->hasTLS()) { output << frontsbase << "tlsnewsessions" << label << front->tlsNewSessions.load() << "\n"; output << frontsbase << "tlsresumptions" << label << front->tlsResumptions.load() << "\n"; diff --git a/pdns/dnsdistdist/dnsdist.hh b/pdns/dnsdistdist/dnsdist.hh index 20fe358d1f..cf0cc99fb3 100644 --- a/pdns/dnsdistdist/dnsdist.hh +++ b/pdns/dnsdistdist/dnsdist.hh @@ -344,6 +344,7 @@ struct ClientState stat_t tls12queries{0}; // valid DNS queries received via TLSv1.2 stat_t tls13queries{0}; // valid DNS queries received via TLSv1.3 stat_t tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version + pdns::stat_double_t tcpAvgIOsPerConnection{0.0}; pdns::stat_double_t tcpAvgQueriesPerConnection{0.0}; /* in ms */ pdns::stat_double_t tcpAvgConnectionDuration{0.0}; @@ -508,10 +509,11 @@ struct ClientState d_filter = bpf; } - void updateTCPMetrics(size_t nbQueries, uint64_t durationMs) + void updateTCPMetrics(size_t nbQueries, uint64_t durationMs, size_t nbIOs) { tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0); tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0); + tcpAvgIOsPerConnection = (99.0 * tcpAvgIOsPerConnection / 100.0) + (nbIOs / 100.0); } }; diff --git a/pdns/dnsdistdist/docs/reference/tuning.rst b/pdns/dnsdistdist/docs/reference/tuning.rst index 1614793ec5..810249edad 100644 --- a/pdns/dnsdistdist/docs/reference/tuning.rst +++ b/pdns/dnsdistdist/docs/reference/tuning.rst @@ -1,6 +1,20 @@ Tuning related functions ======================== +.. function:: setBanDurationForExceedingMaxReadIOsPerQuery(num) + + .. versionadded:: 2.0.0 + + Set for how long, in seconds, a client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded :func:`setMaxTCPReadIOsPerQuery` over a TCP connection. Default is 60 seconds. + +.. function:: setBanDurationForExceedingTCPTLSRate(num) + + .. versionadded:: 2.0.0 + + Set for how long, in seconds, a client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` or :func:`setMaxTLSResumedSessionRatePerClient`. Default is 10 seconds. + + :param int num: Duration of the ban in seconds + .. function:: setDoHDownstreamCleanupInterval(interval) .. versionadded:: 1.7.0 @@ -55,19 +69,27 @@ Tuning related functions .. function:: setMaxTCPConnectionDuration(num) - Set the maximum duration of an incoming TCP connection, in seconds. 0 (the default) means unlimited + Set the maximum duration of an incoming TCP connection, in seconds. 0 (the default) means unlimited. :param int num: +.. function:: setMaxTCPConnectionRatePerClient(num) + + .. versionadded:: 2.0.0 + + Set the maximum number of new TCP connections that a given client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) can open, per second, over the last :func:`setTCPConnectionRateInterval` minutes. Clients exceeding this rate will not be able to open new TCP connections for :func:`setBanDurationForExceedingTCPTLSRate` seconds. See also :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient`. 0 (the default) means unlimited. + + :param int num: Number of new connections per second + .. function:: setMaxTCPConnectionsPerClient(num) - Set the maximum number of TCP connections per client. 0 (the default) means unlimited + Set the maximum number of TCP connections per client. 0 (the default) means unlimited. :param int num: .. function:: setMaxTCPQueriesPerConnection(num) - Set the maximum number of queries in an incoming TCP connection. 0 (the default) means unlimited + Set the maximum number of queries in an incoming TCP connection. 0 (the default) means unlimited. :param int num: @@ -76,28 +98,52 @@ Tuning related functions .. versionchanged:: 1.6.0 Before 1.6.0 the default value was 1000 on all systems. - Set the maximum number of TCP connections queued (waiting to be picked up by a client thread), defaults to 1000 (10000 on Linux since 1.6.0). 0 means unlimited + Set the maximum number of TCP connections queued (waiting to be picked up by a client thread), defaults to 1000 (10000 on Linux since 1.6.0). 0 means unlimited. :param int num: +.. function:: setMaxTCPReadIOsPerQuery(num) + + .. versionadded:: 2.0.0 + + Set the maximum number of read events needed to receive a new query on a TCP connection. Usually reading a DNS query over a TCP connection requires two read events, one to read the query size and one to read the query itself. For large queries, on congested networks, a few short reads might occur, increasing the number of read operations needed to read the full query, but if a large number of read events is needed the client might be misbehaving or even actively trying to hurt the server. When this limit is reached, the TCP connection will be terminated and the offending client IP (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) will be prevented from opening a new TCP connection for up to :func:`setBanDurationForExceedingMaxReadIOsPerQuery` seconds. Default is 50. + + :param int num: Number of read IO events per query + .. function:: setMaxUDPOutstanding(num) .. versionchanged:: 1.4.0 Before 1.4.0 the default value was 10240 - Set the maximum number of outstanding UDP queries to a given backend server. This can only be set at configuration time and defaults to 65535 (10240 before 1.4.0) + Set the maximum number of outstanding UDP queries to a given backend server. This can only be set at configuration time and defaults to 65535 (10240 before 1.4.0). :param int num: +.. function:: setMaxTLSNewSessionRatePerClient(num) + + .. versionadded:: 2.0.0 + + Set the maximum number of new TLS sessions, without resumption, that a given client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) can open, per second, over the last :func:`setTCPConnectionRateInterval` minutes. Clients exceeding this rate will not be able to open new TCP connections for :func:`setBanDurationForExceedingTCPTLSRate` seconds. See also :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTCPConnectionRatePerClient`. 0 (the default) means unlimited. + + :param int num: Number of resumed sessions per second + +.. function:: setMaxTLSResumedSessionRatePerClient(num) + + .. versionadded:: 2.0.0 + + Set the maximum number of resumed TLS sessions that a given client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) can open, per second, over the last :func:`setTCPConnectionRateInterval` minutes. Clients exceeding this rate will not be able to open new TCP connections for :func:`setBanDurationForExceedingTCPTLSRate` seconds. See also :func:`setMaxTLSResumedSessionRatePerClient` and :func:`setMaxTCPConnectionRatePerClient`. 0 (the default) means unlimited. + + :param int num: Number of new sessions per second + .. function:: setCacheCleaningDelay(num) - Set the interval in seconds between two runs of the cache cleaning algorithm, removing expired entries. Default is every 60s + Set the interval in seconds between two runs of the cache cleaning algorithm, removing expired entries. Default is every 60s. :param int num: .. function:: setCacheCleaningPercentage(num) - Set the percentage of the cache that the cache cleaning algorithm will try to free by removing expired entries. By default (100), all expired entries are removed + Set the percentage of the cache that the cache cleaning algorithm will try to free by removing expired entries. By default (100), all expired entries are removed. :param int num: @@ -109,10 +155,52 @@ Tuning related functions .. function:: setStaleCacheEntriesTTL(num) - Allows using cache entries expired for at most n seconds when no backend available to answer for a query + Allows using cache entries expired for at most n seconds when no backend available to answer for a query. :param int num: +.. function:: setTCPConnectionRateInterval(num) + + .. versionadded:: 2.0.0 + + Set the interval, in minutes, over which new TCP and TLS per client connection rates are computed (see :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient`). Default is 5. + + :param int num: Interval in minutes + +.. function:: setTCPConnectionsMaskV4(num) + + .. versionadded:: 2.0.0 + + Mask to apply to IPv4 addresses when enforcing :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient`. In some scenarios it might make sense to apply these settings to a /28 range rather than a single address, for example. Default is 32. + + :param int num: Number of bits to keep + +.. function:: setTCPConnectionsMaskV4Port(num) + + .. versionadded:: 2.0.0 + + Number of bits of the port number to consider when enforcing :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient` over IPv4 addresses, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if :func:`setTCPConnectionsMaskV4` is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``. + + :param int num: Number of bits to keep + +Number of bits of port to consider when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client`` over IPv4, for CGNAT deployments. + +.. function:: setTCPConnectionsMaskV6(num) + + .. versionadded:: 2.0.0 + + Mask to apply to IPv6 addresses when enforcing :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient`. In some scenarios it might make sense to apply these settings to a whole /64 IPv6 range rather than a single address, for example. Default is 128. + + :param int num: Number of bits to keep + +.. function:: setTCPConnectionsOverloadThreshold(num) + + .. versionadded:: 2.0.0 + + Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted: only query per connection is allowed (no out-of-order processing, no idle time allowed), the receive timeout is reduced to 500 milliseconds and the total duration of the TCP connection is limited to 5 seconds. Default is 90. + + :param int num: Threshold in percent + .. function:: setTCPDownstreamCleanupInterval(interval) .. versionadded:: 1.6.0 @@ -168,13 +256,13 @@ Tuning related functions .. function:: setTCPRecvTimeout(num) - Set the read timeout on TCP connections from the client, in seconds. Defaults to 2 + Set the read timeout on TCP connections from the client, in seconds. Defaults to 2. :param int num: .. function:: setTCPSendTimeout(num) - Set the write timeout on TCP connections from the client, in seconds. Defaults to 2 + Set the write timeout on TCP connections from the client, in seconds. Defaults to 2. :param int num: @@ -200,6 +288,6 @@ Tuning related functions .. function:: setUDPTimeout(num) - Set the maximum time dnsdist will wait for a response from a backend over UDP, in seconds. Defaults to 2 + Set the maximum time dnsdist will wait for a response from a backend over UDP, in seconds. Defaults to 2. :param int num: diff --git a/pdns/dnsdistdist/docs/reference/yaml-settings.rst b/pdns/dnsdistdist/docs/reference/yaml-settings.rst index ede608b6c1..0687b4f71c 100644 --- a/pdns/dnsdistdist/docs/reference/yaml-settings.rst +++ b/pdns/dnsdistdist/docs/reference/yaml-settings.rst @@ -926,6 +926,17 @@ TcpTuningConfiguration - **outgoing_max_idle_connection_per_backend**: Unsigned integer ``(10)`` - **max_connections_per_client**: Unsigned integer ``(0)`` - **fast_open_key**: String ``("")`` +- **connections_overload_threshold**: Unsigned integer ``(90)`` - Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted: only query per connection is allowed (no out-of-order processing, no idle time allowed), the receive timeout is reduced to 500 milliseconds and the total duration of the TCP connection is limited to 5 seconds +- **max_connection_rate_per_client**: Unsigned integer ``(0)`` - Set the maximum number of new TCP connections that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client`` +- **connection_rate_interval**: Unsigned integer ``(5)`` - Set the interval, in minutes, over which new TCP and TLS per client connection rates are computed (see ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``) +- **max_tls_new_session_rate_per_client**: Unsigned integer ``(0)`` - Set the maximum number of new TLS sessions, without resumption, that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_connection_rate_per_client`` and ```max_tls_resumed_session_rate_per_client` +- **max_tls_resumed_session_rate_per_client**: Unsigned integer ``(0)`` - Set the maximum number of resumed TLS sessions that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_connection_rate_per_client`` and ```max_tls_new_session_rate_per_client` +- **max_read_ios_per_query**: Unsigned integer ``(50)`` - Set the maximum number of read events needed to receive a new query on a TCP connection. Usually reading a DNS query over a TCP connection requires two read events, one to read the query size and one to read the query itself. For large queries, on congested networks, a few short reads might occur, increasing the number of read operations needed to read the full query, but if a large number of read events is needed the client might be misbehaving or even actively trying to hurt the server. When this limit is reached, the TCP connection will be terminated and the offending client IP (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection for up to ``ban_duration_for_exceeding_max_read_ios_per_query`` seconds +- **ban_duration_for_exceeding_max_read_ios_per_query**: Unsigned integer ``(60)`` - Set for how long, in seconds, a client (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded ``max_read_ios_per_query`` over a TCP connection +- **ban_duration_for_exceeding_tcp_tls_rate**: Unsigned integer ``(10)`` - Set for how long, in seconds, a client (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` or ``max_tls_resumed_session_rate_per_client`` +- **connections_mask_v4**: Unsigned integer ``(32)`` - Mask to apply to IPv4 addresses when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``. In some scenarios it might make sense to apply these settings to a /28 range rather than a single address, for example +- **connections_mask_v6**: Unsigned integer ``(128)`` - Mask to apply to IPv6 addresses when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``. In some scenarios it might make sense to apply these settings to a whole /64 IPv6 range instead of a single address, for example +- **connections_mask_v4_port**: Unsigned integer ``(0)`` - Number of bits of port to consider when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client`` over IPv4, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if ``connections_mask_v4`` is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535`` .. _yaml-settings-TlsEngineConfiguration: diff --git a/pdns/dnsdistdist/doh.cc b/pdns/dnsdistdist/doh.cc index 62f564aaac..2354aaada3 100644 --- a/pdns/dnsdistdist/doh.cc +++ b/pdns/dnsdistdist/doh.cc @@ -327,7 +327,7 @@ static void on_socketclose(void *data) auto diff = now - conn->d_connectionStartTime; conn->d_acceptCtx->decrementConcurrentConnections(); - conn->d_acceptCtx->d_cs->updateTCPMetrics(conn->d_nbQueries, diff.tv_sec * 1000 + diff.tv_usec / 1000); + conn->d_acceptCtx->d_cs->updateTCPMetrics(conn->d_nbQueries, diff.tv_sec * 1000 + diff.tv_usec / 1000, 0); } dnsdist::IncomingConcurrentTCPConnectionsManager::accountClosedTCPConnection(conn->d_remote); @@ -1392,8 +1392,8 @@ static void on_accept(h2o_socket_t *listener, const char *err) return; } - if (!dnsdist::IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(remote)) { - vinfolog("Dropping DoH connection from %s because we have too many from this client already", remote.toStringWithPort()); + auto connectionResult = dnsdist::IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(remote, false); + if (connectionResult == dnsdist::IncomingConcurrentTCPConnectionsManager::NewConnectionResult::Denied) { h2o_socket_close(sock); return; } diff --git a/pdns/dnsdistdist/meson.build b/pdns/dnsdistdist/meson.build index 070aff8f00..ab84049642 100644 --- a/pdns/dnsdistdist/meson.build +++ b/pdns/dnsdistdist/meson.build @@ -129,6 +129,7 @@ common_sources += files( src_dir / 'dnsdist-backend.cc', src_dir / 'dnsdist-cache.cc', src_dir / 'dnsdist-carbon.cc', + src_dir / 'dnsdist-concurrent-connections.cc', src_dir / 'dnsdist-configuration.cc', src_dir / 'dnsdist-configuration-yaml.cc', src_dir / 'dnsdist-console.cc', diff --git a/regression-tests.dnsdist/test_TCPLimits.py b/regression-tests.dnsdist/test_TCPLimits.py index 7f24f54ce3..4567b24661 100644 --- a/regression-tests.dnsdist/test_TCPLimits.py +++ b/regression-tests.dnsdist/test_TCPLimits.py @@ -21,11 +21,13 @@ class TestTCPLimits(DNSDistTest): _maxTCPConnsPerClient = 3 _maxTCPConnDuration = 5 _config_template = """ - newServer{address="127.0.0.1:%s"} - setTCPRecvTimeout(%s) - setMaxTCPQueriesPerConnection(%s) - setMaxTCPConnectionsPerClient(%s) - setMaxTCPConnectionDuration(%s) + newServer{address="127.0.0.1:%d"} + setTCPRecvTimeout(%d) + setMaxTCPQueriesPerConnection(%d) + setMaxTCPConnectionsPerClient(%d) + setMaxTCPConnectionDuration(%d) + -- disable "near limits" otherwise our tests are broken because connections are forcibly closed + setTCPConnectionsOverloadThreshold(0) """ _config_params = ['_testServerPort', '_tcpIdleTimeout', '_maxTCPQueriesPerConn', '_maxTCPConnsPerClient', '_maxTCPConnDuration'] _verboseMode = True @@ -141,8 +143,10 @@ class TestTCPFrontendLimits(DNSDistTest): _tcpIdleTimeout = 2 _maxTCPConnsPerFrontend = 10 _config_template = """ - newServer{address="127.0.0.1:%s"} + newServer{address="127.0.0.1:%d"} setLocal("%s:%d", {maxConcurrentTCPConnections=%d}) + -- disable "near limits" otherwise our tests are broken because connections are forcibly closed + setTCPConnectionsOverloadThreshold(0) """ _config_params = ['_testServerPort', '_dnsDistListeningAddr', '_dnsDistPort', '_maxTCPConnsPerFrontend'] _verboseMode = True -- 2.47.2