dnsdist-backoff.hh \
dnsdist-cache.cc dnsdist-cache.hh \
dnsdist-carbon.cc dnsdist-carbon.hh \
- dnsdist-concurrent-connections.hh \
+ dnsdist-concurrent-connections.cc dnsdist-concurrent-connections.hh \
dnsdist-configuration-yaml-internal.hh \
dnsdist-configuration-yaml.cc dnsdist-configuration-yaml.hh \
dnsdist-configuration.cc dnsdist-configuration.hh \
dnsdist-backend.cc dnsdist-backend.hh \
dnsdist-backoff.hh \
dnsdist-cache.cc dnsdist-cache.hh \
- dnsdist-concurrent-connections.hh \
+ dnsdist-concurrent-connections.cc dnsdist-concurrent-connections.hh \
dnsdist-configuration.cc dnsdist-configuration.hh \
dnsdist-crypto.cc dnsdist-crypto.hh \
dnsdist-dnsparser.cc dnsdist-dnsparser.hh \
str << base << "tcpmaxconcurrentconnections" << ' ' << front->tcpMaxConcurrentConnections.load() << " " << now << "\r\n";
str << base << "tcpavgqueriesperconnection" << ' ' << front->tcpAvgQueriesPerConnection.load() << " " << now << "\r\n";
str << base << "tcpavgconnectionduration" << ' ' << front->tcpAvgConnectionDuration.load() << " " << now << "\r\n";
+ str << base << "tcpavgreadios" << ' ' << front->tcpAvgIOsPerConnection.load() << " " << now << "\r\n";
str << base << "tls10-queries" << ' ' << front->tls10queries.load() << " " << now << "\r\n";
str << base << "tls11-queries" << ' ' << front->tls11queries.load() << " " << now << "\r\n";
str << base << "tls12-queries" << ' ' << front->tls12queries.load() << " " << now << "\r\n";
--- /dev/null
+/*
+ * This file is part of PowerDNS or dnsdist.
+ * Copyright -- PowerDNS.COM B.V. and its contributors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In addition, for the avoidance of any doubt, permission is granted to
+ * link this program with OpenSSL and to (re)distribute the binaries
+ * produced as the result of such linking.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "dnsdist-concurrent-connections.hh"
+
+#include <boost/multi_index_container.hpp>
+#include <boost/multi_index/ordered_index.hpp>
+#include <boost/multi_index/hashed_index.hpp>
+#include <boost/multi_index/key_extractors.hpp>
+
+#include <utility>
+
+#include "circular_buffer.hh"
+#include "dnsdist-configuration.hh"
+#include "dolog.hh"
+#include "lock.hh"
+
+namespace dnsdist
+{
+
+static constexpr size_t NB_SHARDS = 10;
+
+struct ClientActivity
+{
+ uint64_t tcpConnections{0};
+ uint64_t tlsNewSessions{0}; /* without resumption */
+ uint64_t tlsResumedSessions{0};
+ time_t bucketEndTime{0};
+};
+
+struct ClientEntry
+{
+ mutable boost::circular_buffer<ClientActivity> d_activity;
+ AddressAndPortRange d_addr;
+ mutable uint64_t d_concurrentConnections{0};
+ mutable time_t d_bannedUntil{0};
+ time_t d_lastSeen{0};
+};
+
+struct TimeTag
+{
+};
+struct AddressTag
+{
+};
+
+using map_t = boost::multi_index_container<
+ ClientEntry,
+ boost::multi_index::indexed_by<
+ boost::multi_index::hashed_unique<boost::multi_index::tag<AddressTag>,
+ boost::multi_index::member<ClientEntry, AddressAndPortRange, &ClientEntry::d_addr>, AddressAndPortRange::hash>,
+ boost::multi_index::ordered_non_unique<boost::multi_index::tag<TimeTag>,
+ boost::multi_index::member<ClientEntry, time_t, &ClientEntry::d_lastSeen>>>>;
+
+static std::vector<LockGuarded<map_t>> s_tcpClientsConnectionMetrics{10};
+
+static AddressAndPortRange getRange(const ComboAddress& from)
+{
+ const auto& immutable = dnsdist::configuration::getImmutableConfiguration();
+ return AddressAndPortRange(from, from.isIPv4() ? immutable.d_tcpConnectionsMaskV4 : immutable.d_tcpConnectionsMaskV6, from.isIPv4() ? immutable.d_tcpConnectionsMaskV4Port : 0);
+}
+
+static size_t getShardID(const AddressAndPortRange& from)
+{
+ auto hash = AddressAndPortRange::hash()(from);
+ return hash % NB_SHARDS;
+}
+
+static bool checkTCPConnectionsRate(const boost::circular_buffer<ClientActivity>& activity, time_t now, uint64_t maxTCPRate, uint64_t maxTLSNewRate, uint64_t maxTLSResumedRate, uint64_t interval, bool isTLS)
+{
+ if (maxTCPRate == 0 && (!isTLS || (maxTLSNewRate == 0 && maxTLSResumedRate == 0))) {
+ return true;
+ }
+ uint64_t bucketsConsidered = 0;
+ uint64_t connectionsSeen = 0;
+ uint64_t tlsNewSeen = 0;
+ uint64_t tlsResumedSeen = 0;
+ time_t cutOff = now - (interval * 60);
+ for (const auto& entry : activity) {
+ if (entry.bucketEndTime < cutOff) {
+ continue;
+ }
+ ++bucketsConsidered;
+ connectionsSeen += entry.tcpConnections;
+ tlsNewSeen += entry.tlsNewSessions;
+ tlsResumedSeen += entry.tlsResumedSessions;
+ }
+ if (bucketsConsidered == 0) {
+ return true;
+ }
+ if (maxTCPRate > 0) {
+ auto rate = connectionsSeen / bucketsConsidered;
+ if (rate > maxTCPRate) {
+ return false;
+ }
+ }
+ if (maxTLSNewRate > 0 && isTLS) {
+ auto rate = tlsNewSeen / bucketsConsidered;
+ if (rate > maxTLSNewRate) {
+ return false;
+ }
+ }
+ if (maxTLSResumedRate > 0 && isTLS) {
+ auto rate = tlsResumedSeen / bucketsConsidered;
+ if (rate > maxTLSResumedRate) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void IncomingConcurrentTCPConnectionsManager::cleanup(time_t now)
+{
+ const auto& immutable = dnsdist::configuration::getImmutableConfiguration();
+ const auto interval = immutable.d_tcpConnectionsRatePerClientInterval;
+ time_t cutOff = now - (interval * 60);
+ for (auto& shard : s_tcpClientsConnectionMetrics) {
+ auto db = shard.lock();
+ auto& index = db->get<TimeTag>();
+ for (auto entry = index.begin(); entry != index.end();) {
+ if (entry->d_lastSeen >= cutOff) {
+ /* this index is ordered on timestamps,
+ so the first valid entry we see means we are done */
+ break;
+ }
+
+ entry = index.erase(entry);
+ }
+ }
+}
+
+static ClientActivity& getCurrentClientActivity(const ClientEntry& entry, time_t now)
+{
+ auto& activity = entry.d_activity;
+ if (activity.empty() || activity.front().bucketEndTime < now) {
+ activity.push_front(ClientActivity{1, 0, 0, now + 60});
+ }
+ return activity.front();
+}
+
+IncomingConcurrentTCPConnectionsManager::NewConnectionResult IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(const ComboAddress& from, bool isTLS)
+{
+ const auto& immutable = dnsdist::configuration::getImmutableConfiguration();
+ const auto maxConnsPerClient = immutable.d_maxTCPConnectionsPerClient;
+ const auto threshold = immutable.d_tcpConnectionsOverloadThreshold;
+ const auto tcpRate = immutable.d_maxTCPConnectionsRatePerClient;
+ const auto tlsNewRate = immutable.d_maxTLSNewSessionsRatePerClient;
+ const auto tlsResumedRate = immutable.d_maxTLSResumedSessionsRatePerClient;
+ const auto interval = immutable.d_tcpConnectionsRatePerClientInterval;
+ if (maxConnsPerClient == 0 && tcpRate == 0 && tlsResumedRate == 0 && tlsNewRate == 0 && immutable.d_maxTCPReadIOsPerQuery == 0) {
+ return NewConnectionResult::Allowed;
+ }
+
+ auto now = time(nullptr);
+ auto updateActivity = [now](ClientEntry& entry) {
+ ++entry.d_concurrentConnections;
+ entry.d_lastSeen = now;
+ auto& activity = getCurrentClientActivity(entry, now);
+ ++activity.tcpConnections;
+ };
+
+ auto checkConnectionAllowed = [now, from, maxConnsPerClient, threshold, tcpRate, tlsNewRate, tlsResumedRate, interval, isTLS, &immutable](const ClientEntry& entry) {
+ if (entry.d_bannedUntil != 0 && entry.d_bannedUntil >= now) {
+ vinfolog("Refusing TCP connection from %s: banned", from.toStringWithPort());
+ return NewConnectionResult::Denied;
+ }
+ if (maxConnsPerClient > 0 && entry.d_concurrentConnections >= maxConnsPerClient) {
+ vinfolog("Refusing TCP connection from %s: too many connections", from.toStringWithPort());
+ return NewConnectionResult::Denied;
+ }
+ if (!checkTCPConnectionsRate(entry.d_activity, now, tcpRate, tlsNewRate, tlsResumedRate, interval, isTLS)) {
+ entry.d_bannedUntil = now + immutable.d_tcpBanDurationForExceedingTCPTLSRate;
+ vinfolog("Banning TCP connections from %s for %d seconds: too many new TCP/TLS connections per second", from.toStringWithPort(), immutable.d_tcpBanDurationForExceedingTCPTLSRate);
+ return NewConnectionResult::Denied;
+ }
+
+ if (maxConnsPerClient == 0 || threshold == 0) {
+ return NewConnectionResult::Allowed;
+ }
+
+ auto current = (100 * entry.d_concurrentConnections) / maxConnsPerClient;
+ if (current < threshold) {
+ return NewConnectionResult::Allowed;
+ }
+ vinfolog("Restricting TCP connection from %s: nearly reaching the maximum number of concurrent TCP connections", from.toStringWithPort());
+ return NewConnectionResult::Restricted;
+ };
+
+ auto addr = getRange(from);
+ {
+ auto shardID = getShardID(addr);
+ auto db = s_tcpClientsConnectionMetrics.at(shardID).lock();
+ const auto& entry = db->find(addr);
+ if (entry == db->end()) {
+ ClientEntry newEntry;
+ newEntry.d_activity.set_capacity(interval);
+ newEntry.d_addr = addr;
+ newEntry.d_concurrentConnections = 1;
+ newEntry.d_lastSeen = now;
+ db->insert(std::move(newEntry));
+ return NewConnectionResult::Allowed;
+ }
+ auto result = checkConnectionAllowed(*entry);
+ if (result != NewConnectionResult::Denied) {
+ db->modify(entry, updateActivity);
+ }
+ return result;
+ }
+}
+
+bool IncomingConcurrentTCPConnectionsManager::isClientOverThreshold(const ComboAddress& from)
+{
+ const auto& immutable = dnsdist::configuration::getImmutableConfiguration();
+ const auto maxConnsPerClient = immutable.d_maxTCPConnectionsPerClient;
+ if (maxConnsPerClient == 0 || immutable.d_tcpConnectionsOverloadThreshold == 0) {
+ return false;
+ }
+
+ size_t count = 0;
+ auto addr = getRange(from);
+ auto shardID = getShardID(addr);
+ {
+ auto db = s_tcpClientsConnectionMetrics.at(shardID).lock();
+ auto it = db->find(addr);
+ if (it == db->end()) {
+ return false;
+ }
+ count = it->d_concurrentConnections;
+ }
+
+ auto current = (100 * count) / maxConnsPerClient;
+ return current >= immutable.d_tcpConnectionsOverloadThreshold;
+}
+
+void IncomingConcurrentTCPConnectionsManager::banClientFor(const ComboAddress& from, time_t now, uint32_t seconds)
+{
+ auto addr = getRange(from);
+ auto shardID = getShardID(addr);
+ {
+ auto db = s_tcpClientsConnectionMetrics.at(shardID).lock();
+ auto it = db->find(addr);
+ if (it == db->end()) {
+ return;
+ }
+ db->modify(it, [now, seconds](ClientEntry& entry) {
+ entry.d_lastSeen = now;
+ entry.d_bannedUntil = now + seconds;
+ });
+ }
+ vinfolog("Banned TCP client %s for %d seconds", from.toStringWithPort(), seconds);
+}
+
+void IncomingConcurrentTCPConnectionsManager::accountClosedTCPConnection(const ComboAddress& from)
+{
+ const auto maxConnsPerClient = dnsdist::configuration::getImmutableConfiguration().d_maxTCPConnectionsPerClient;
+ if (maxConnsPerClient == 0) {
+ return;
+ }
+ auto addr = getRange(from);
+ auto shardID = getShardID(addr);
+ {
+ auto db = s_tcpClientsConnectionMetrics.at(shardID).lock();
+ auto it = db->find(addr);
+ if (it == db->end()) {
+ return;
+ }
+ auto& count = it->d_concurrentConnections;
+ count--;
+ }
+}
+
+void IncomingConcurrentTCPConnectionsManager::accountTLSNewSession(const ComboAddress& from)
+{
+ const auto maxRate = dnsdist::configuration::getImmutableConfiguration().d_maxTLSNewSessionsRatePerClient > 0;
+ if (maxRate == 0) {
+ return;
+ }
+ auto addr = getRange(from);
+ auto shardID = getShardID(addr);
+ {
+ auto db = s_tcpClientsConnectionMetrics.at(shardID).lock();
+ auto it = db->find(addr);
+ if (it == db->end()) {
+ return;
+ }
+ auto& count = getCurrentClientActivity(*it, time(nullptr)).tlsNewSessions;
+ count++;
+ }
+}
+
+void IncomingConcurrentTCPConnectionsManager::accountTLSResumedSession(const ComboAddress& from)
+{
+ const auto maxRate = dnsdist::configuration::getImmutableConfiguration().d_maxTLSResumedSessionsRatePerClient > 0;
+ if (maxRate == 0) {
+ return;
+ }
+ auto addr = getRange(from);
+ auto shardID = getShardID(addr);
+ {
+ auto db = s_tcpClientsConnectionMetrics.at(shardID).lock();
+ auto it = db->find(addr);
+ if (it == db->end()) {
+ return;
+ }
+ auto& count = getCurrentClientActivity(*it, time(nullptr)).tlsResumedSessions;
+ count++;
+ }
+}
+
+}
*/
#pragma once
-#include <map>
#include "iputils.hh"
-#include "lock.hh"
-#include "dnsdist-configuration.hh"
namespace dnsdist
{
class IncomingConcurrentTCPConnectionsManager
{
public:
- static bool accountNewTCPConnection(const ComboAddress& from)
+ enum class NewConnectionResult : uint8_t
{
- const auto maxConnsPerClient = dnsdist::configuration::getImmutableConfiguration().d_maxTCPConnectionsPerClient;
- if (maxConnsPerClient == 0) {
- return true;
- }
- auto db = s_tcpClientsConcurrentConnectionsCount.lock();
- auto& count = (*db)[from];
- if (count >= maxConnsPerClient) {
- return false;
- }
- ++count;
- return true;
- }
-
- static void accountClosedTCPConnection(const ComboAddress& from)
- {
- const auto maxConnsPerClient = dnsdist::configuration::getImmutableConfiguration().d_maxTCPConnectionsPerClient;
- if (maxConnsPerClient == 0) {
- return;
- }
- auto db = s_tcpClientsConcurrentConnectionsCount.lock();
- auto& count = db->at(from);
- count--;
- if (count == 0) {
- db->erase(from);
- }
- }
-
-private:
- static LockGuarded<std::map<ComboAddress, size_t, ComboAddress::addressOnlyLessThan>> s_tcpClientsConcurrentConnectionsCount;
+ Allowed = 0,
+ Denied = 1,
+ Restricted = 2,
+ };
+ static NewConnectionResult accountNewTCPConnection(const ComboAddress& from, bool isTLS);
+ static bool isClientOverThreshold(const ComboAddress& from);
+ static void accountTLSNewSession(const ComboAddress& from);
+ static void accountTLSResumedSession(const ComboAddress& from);
+ static void accountClosedTCPConnection(const ComboAddress& from);
+ static void banClientFor(const ComboAddress& from, time_t now, uint32_t seconds);
+ static void cleanup(time_t now);
};
-
}
uint64_t d_outgoingDoHMaxIdlePerBackend{10};
uint64_t d_outgoingTCPMaxIdlePerBackend{10};
uint64_t d_maxTCPClientThreads{10};
+ uint64_t d_maxTCPConnectionsRatePerClient{0};
+ uint64_t d_maxTLSResumedSessionsRatePerClient{0};
+ uint64_t d_maxTLSNewSessionsRatePerClient{0};
+ uint64_t d_tcpConnectionsRatePerClientInterval{5};
size_t d_maxTCPConnectionsPerClient{0};
size_t d_udpVectorSize{1};
size_t d_ringsCapacity{10000};
uint32_t d_socketUDPSendBuffer{0};
uint32_t d_socketUDPRecvBuffer{0};
uint32_t d_hashPerturbation{0};
+ uint32_t d_maxTCPReadIOsPerQuery{50};
+ uint32_t d_tcpBanDurationForExceedingMaxReadIOsPerQuery{60};
+ uint32_t d_tcpBanDurationForExceedingTCPTLSRate{10};
uint16_t d_maxUDPOutstanding{std::numeric_limits<uint16_t>::max()};
uint8_t d_udpTimeout{2};
+ uint8_t d_tcpConnectionsOverloadThreshold{90};
+ uint8_t d_tcpConnectionsMaskV4{32};
+ uint8_t d_tcpConnectionsMaskV6{128};
+ uint8_t d_tcpConnectionsMaskV4Port{0};
bool d_randomizeUDPSocketsToBackend{false};
bool d_randomizeIDsToBackend{false};
bool d_ringsRecordQueries{true};
{"setAddEDNSToSelfGeneratedResponses", true, "add", "set whether to add EDNS to self-generated responses, provided that the initial query had EDNS"},
{"setAllowEmptyResponse", true, "allow", "Set to true (defaults to false) to allow empty responses (qdcount=0) with a NoError or NXDomain rcode (default) from backends"},
{"setAPIWritable", true, "bool, dir", "allow modifications via the API. if `dir` is set, it must be a valid directory where the configuration files will be written by the API"},
+ {"setBanDurationForExceedingMaxReadIOsPerQuery", true, "n", "Set for how long, in seconds, a client (or range) will be prevented from opening a new TCP connection when it has exceeded the maximum number of read IOs per query over a TCP connection"},
+ {"setBanDurationForExceedingTCPTLSRate", true, "n", "Set for how long, in seconds, a client (or range) will be prevented from opening a new TCP connection when it has exceeded the TCP connection or TLS session rates"},
{"setCacheCleaningDelay", true, "num", "Set the interval in seconds between two runs of the cache cleaning algorithm, removing expired entries"},
{"setCacheCleaningPercentage", true, "num", "Set the percentage of the cache that the cache cleaning algorithm will try to free by removing expired entries. By default (100), all expired entries are remove"},
{"setConsistentHashingBalancingFactor", true, "factor", "Set the balancing factor for bounded-load consistent hashing"},
{"setMaxCachedTCPConnectionsPerDownstream", true, "max", "Set the maximum number of inactive TCP connections to a backend cached by each worker TCP thread"},
{"setMaxTCPClientThreads", true, "n", "set the maximum of TCP client threads, handling TCP connections"},
{"setMaxTCPConnectionDuration", true, "n", "set the maximum duration of an incoming TCP connection, in seconds. 0 means unlimited"},
+ {"setMaxTCPConnectionRatePerClient", true, "n", "set the maximum number of new TCP connections that a given client can open per second"},
{"setMaxTCPConnectionsPerClient", true, "n", "set the maximum number of TCP connections per client. 0 means unlimited"},
{"setMaxTCPQueriesPerConnection", true, "n", "set the maximum number of queries in an incoming TCP connection. 0 means unlimited"},
{"setMaxTCPQueuedConnections", true, "n", "set the maximum number of TCP connections queued (waiting to be picked up by a client thread)"},
+ {"setMaxTCPReadIOsPerQuery", true, "n", "set the maximum number of read events needed to receive a new query on a TCP connection"},
+ {"setMaxTLSNewSessionRatePerClient", true, "n", "set the maximum number of new TLS sessions that a given client can open per second"},
+ {"setMaxTLSResumedSessionRatePerClient", true, "n", "set the maximum number of resumed TLS sessions that a given client can open per second"},
{"setMaxUDPOutstanding", true, "n", "set the maximum number of outstanding UDP queries to a given backend server. This can only be set at configuration time and defaults to 65535"},
{"setMetric", true, "name, value", "Set the value of a custom metric to the supplied value"},
{"setPayloadSizeOnSelfGeneratedAnswers", true, "payloadSize", "set the UDP payload size advertised via EDNS on self-generated responses"},
{"setStaleCacheEntriesTTL", true, "n", "allows using cache entries expired for at most n seconds when there is no backend available to answer for a query"},
{"setStructuredLogging", true, "value [, options]", "set whether log messages should be in structured-logging-like format"},
{"setSyslogFacility", true, "facility", "set the syslog logging facility to 'facility'. Defaults to LOG_DAEMON"},
+ {"setTCPConnectionsMaskV4", true, "n", "Mask to apply to IPv4 addresses when enforcing the TLS connection or TLS sessions rates"},
+ {"setTCPConnectionsMaskV4Port", true, "n", "Mask to apply to the port when enforcing the TLS connection or TLS sessions rates for IPv4 addresses"},
+ {"setTCPConnectionsMaskV6", true, "n", "Mask to apply to IPv6 addresses when enforcing the TLS connection or TLS sessions rates"},
+ {"setTCPConnectionsOverloadThreshold", true, "n", "Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted"},
+ {"setTCPConnectionRateInterval", true, "n", "Set the interval, in minutes, over which new TCP and TLS per client connection rates are computed"},
{"setTCPDownstreamCleanupInterval", true, "interval", "minimum interval in seconds between two cleanups of the idle TCP downstream connections"},
- {"setTCPFastOpenKey", true, "string", "TCP Fast Open Key"},
{"setTCPDownstreamMaxIdleTime", true, "time", "Maximum time in seconds that a downstream TCP connection to a backend might stay idle"},
+ {"setTCPConnectionsOverloadThreshold", true, "n", "Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted: only query per connection is allowed (no out-of-order processing, no idle time allowed), the receive timeout is reduced to 500 milliseconds and the total duration of the TCP connection is limited to 5 seconds"},
+ {"setTCPFastOpenKey", true, "string", "TCP Fast Open Key"},
{"setTCPInternalPipeBufferSize", true, "size", "Set the size in bytes of the internal buffer of the pipes used internally to distribute connections to TCP (and DoT) workers threads"},
{"setTCPRecvTimeout", true, "n", "set the read timeout on TCP connections from the client, in seconds"},
{"setTCPSendTimeout", true, "n", "set the write timeout on TCP connections from the client, in seconds"},
{"setUDPTimeout", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_udpTimeout = newValue; }, std::numeric_limits<uint8_t>::max()}},
{"setConsoleMaximumConcurrentConnections", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_consoleMaxConcurrentConnections = newValue; }, std::numeric_limits<uint32_t>::max()}},
{"setRingBuffersLockRetries", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_ringsNbLockTries = newValue; }, std::numeric_limits<uint64_t>::max()}},
+ {"setMaxTCPConnectionRatePerClient", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_maxTCPConnectionsRatePerClient = newValue; }, std::numeric_limits<uint64_t>::max()}},
+ {"setMaxTLSResumedSessionRatePerClient", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_maxTLSResumedSessionsRatePerClient = newValue; }, std::numeric_limits<uint64_t>::max()}},
+ {"setMaxTLSNewSessionRatePerClient", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_maxTLSNewSessionsRatePerClient = newValue; }, std::numeric_limits<uint64_t>::max()}},
+ {"setTCPConnectionRateInterval", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsRatePerClientInterval = newValue; }, std::numeric_limits<uint64_t>::max()}},
+ {"setMaxTCPReadIOsPerQuery", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_maxTCPReadIOsPerQuery = newValue; }, std::numeric_limits<uint32_t>::max()}},
+ {"setBanDurationForExceedingMaxReadIOsPerQuery", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpBanDurationForExceedingMaxReadIOsPerQuery = newValue; }, std::numeric_limits<uint32_t>::max()}},
+ {"setBanDurationForExceedingTCPTLSRate", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpBanDurationForExceedingTCPTLSRate = newValue; }, std::numeric_limits<uint32_t>::max()}},
+ {"setTCPConnectionsOverloadThreshold", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsOverloadThreshold = newValue; }, std::numeric_limits<uint8_t>::max()}},
+ {"setTCPConnectionsMaskV4", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsMaskV4 = newValue; }, std::numeric_limits<uint8_t>::max()}},
+ {"setTCPConnectionsMaskV6", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsMaskV6 = newValue; }, std::numeric_limits<uint8_t>::max()}},
+ {"setTCPConnectionsMaskV4Port", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsMaskV4Port = newValue; }, std::numeric_limits<uint8_t>::max()}},
+ {"setTCPConnectionsOverloadThreshold", {[](dnsdist::configuration::ImmutableConfiguration& config, uint64_t newValue) { config.d_tcpConnectionsOverloadThreshold = newValue; }, 100}},
};
static const std::map<std::string, DoubleImmutableConfigurationItems> s_doubleImmutableConfigItems{
ret << endl;
ret << "Frontends:" << endl;
- fmt = boost::format("%-3d %-20.20s %-20d %-20d %-20d %-25d %-20d %-20d %-20d %-20f %-20f %-20d %-20d %-25d %-25d %-15d %-15d %-15d %-15d %-15d");
- ret << (fmt % "#" % "Address" % "Connections" % "Max concurrent conn" % "Died reading query" % "Died sending response" % "Gave up" % "Client timeouts" % "Downstream timeouts" % "Avg queries/conn" % "Avg duration" % "TLS new sessions" % "TLS Resumptions" % "TLS unknown ticket keys" % "TLS inactive ticket keys" % "TLS 1.0" % "TLS 1.1" % "TLS 1.2" % "TLS 1.3" % "TLS other") << endl;
+ fmt = boost::format("%-3d %-20.20s %-20d %-20d %-20d %-25d %-20d %-20d %-20d %-20f %-20f %-20d %-20d %-25d %-25d %-15d %-15d %-15d %-15d %-15d %-15d");
+ ret << (fmt % "#" % "Address" % "Connections" % "Max concurrent conn" % "Died reading query" % "Died sending response" % "Gave up" % "Client timeouts" % "Downstream timeouts" % "Avg queries/conn" % "Avg duration" % "Avg read IOs/conn" % "TLS new sessions" % "TLS Resumptions" % "TLS unknown ticket keys" % "TLS inactive ticket keys" % "TLS 1.0" % "TLS 1.1" % "TLS 1.2" % "TLS 1.3" % "TLS other") << endl;
size_t counter = 0;
for (const auto& frontend : dnsdist::getFrontends()) {
- ret << (fmt % counter % frontend->local.toStringWithPort() % frontend->tcpCurrentConnections % frontend->tcpMaxConcurrentConnections % frontend->tcpDiedReadingQuery % frontend->tcpDiedSendingResponse % frontend->tcpGaveUp % frontend->tcpClientTimeouts % frontend->tcpDownstreamTimeouts % frontend->tcpAvgQueriesPerConnection % frontend->tcpAvgConnectionDuration % frontend->tlsNewSessions % frontend->tlsResumptions % frontend->tlsUnknownTicketKey % frontend->tlsInactiveTicketKey % frontend->tls10queries % frontend->tls11queries % frontend->tls12queries % frontend->tls13queries % frontend->tlsUnknownqueries) << endl;
+ ret << (fmt % counter % frontend->local.toStringWithPort() % frontend->tcpCurrentConnections % frontend->tcpMaxConcurrentConnections % frontend->tcpDiedReadingQuery % frontend->tcpDiedSendingResponse % frontend->tcpGaveUp % frontend->tcpClientTimeouts % frontend->tcpDownstreamTimeouts % frontend->tcpAvgQueriesPerConnection % frontend->tcpAvgConnectionDuration % frontend->tcpAvgIOsPerConnection % frontend->tlsNewSessions % frontend->tlsResumptions % frontend->tlsUnknownTicketKey % frontend->tlsInactiveTicketKey % frontend->tls10queries % frontend->tls11queries % frontend->tls12queries % frontend->tls13queries % frontend->tlsUnknownqueries) << endl;
++counter;
}
ret << endl;
void IncomingHTTP2Connection::handleConnectionReady()
{
constexpr std::array<nghttp2_settings_entry, 1> settings{{{NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 100U}}};
- auto ret = nghttp2_submit_settings(d_session.get(), NGHTTP2_FLAG_NONE, settings.data(), settings.size());
+ constexpr std::array<nghttp2_settings_entry, 1> nearLimitsSettings{{{NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, 1U}}};
+ auto ret = nghttp2_submit_settings(d_session.get(), NGHTTP2_FLAG_NONE, isNearTCPLimits() ? nearLimitsSettings.data() : settings.data(), isNearTCPLimits() ? nearLimitsSettings.size() : settings.size());
if (ret != 0) {
throw std::runtime_error("Fatal error: " + std::string(nghttp2_strerror(ret)));
}
if (config.d_maxTCPConnectionsPerClient == 0) {
config.d_maxTCPConnectionsPerClient = yamlConfig.tuning.tcp.max_connections_per_client;
}
+ if (config.d_tcpConnectionsOverloadThreshold == 90) {
+ config.d_tcpConnectionsOverloadThreshold = yamlConfig.tuning.tcp.connections_overload_threshold;
+ }
+ if (config.d_maxTCPConnectionsRatePerClient == 0) {
+ config.d_maxTCPConnectionsRatePerClient = yamlConfig.tuning.tcp.max_connection_rate_per_client;
+ }
+ if (config.d_tcpConnectionsRatePerClientInterval == 5) {
+ config.d_tcpConnectionsRatePerClientInterval = yamlConfig.tuning.tcp.connection_rate_interval;
+ }
+ if (config.d_maxTLSNewSessionsRatePerClient == 0) {
+ config.d_maxTLSNewSessionsRatePerClient = yamlConfig.tuning.tcp.max_tls_new_session_rate_per_client;
+ }
+ if (config.d_maxTLSResumedSessionsRatePerClient == 0) {
+ config.d_maxTLSResumedSessionsRatePerClient = yamlConfig.tuning.tcp.max_tls_resumed_session_rate_per_client;
+ }
+ if (config.d_maxTCPReadIOsPerQuery == 50) {
+ config.d_maxTCPReadIOsPerQuery = yamlConfig.tuning.tcp.max_read_ios_per_query;
+ }
+ if (config.d_tcpBanDurationForExceedingMaxReadIOsPerQuery == 60) {
+ config.d_tcpBanDurationForExceedingMaxReadIOsPerQuery = yamlConfig.tuning.tcp.ban_duration_for_exceeding_max_read_ios_per_query;
+ }
+ if (config.d_tcpBanDurationForExceedingTCPTLSRate == 10) {
+ config.d_tcpBanDurationForExceedingTCPTLSRate = yamlConfig.tuning.tcp.ban_duration_for_exceeding_tcp_tls_rate;
+ }
+ if (config.d_tcpConnectionsMaskV4 == 32) {
+ config.d_tcpConnectionsMaskV4 = yamlConfig.tuning.tcp.connections_mask_v4;
+ }
+ if (config.d_tcpConnectionsMaskV6 == 128) {
+ config.d_tcpConnectionsMaskV6 = yamlConfig.tuning.tcp.connections_mask_v6;
+ }
+ if (config.d_tcpConnectionsMaskV4Port == 0) {
+ config.d_tcpConnectionsMaskV4Port = yamlConfig.tuning.tcp.connections_mask_v4_port;
+ }
if (config.d_udpVectorSize == 1) {
config.d_udpVectorSize = yamlConfig.tuning.udp.messages_per_round;
}
max_connections_per_client: u32,
#[serde(default, skip_serializing_if = "crate::is_default")]
fast_open_key: String,
+ #[serde(default = "crate::U8::<90>::value", skip_serializing_if = "crate::U8::<90>::is_equal")]
+ connections_overload_threshold: u8,
+ #[serde(default, skip_serializing_if = "crate::is_default")]
+ max_connection_rate_per_client: u64,
+ #[serde(default = "crate::U64::<5>::value", skip_serializing_if = "crate::U64::<5>::is_equal")]
+ connection_rate_interval: u64,
+ #[serde(default, skip_serializing_if = "crate::is_default")]
+ max_tls_new_session_rate_per_client: u64,
+ #[serde(default, skip_serializing_if = "crate::is_default")]
+ max_tls_resumed_session_rate_per_client: u64,
+ #[serde(default = "crate::U32::<50>::value", skip_serializing_if = "crate::U32::<50>::is_equal")]
+ max_read_ios_per_query: u32,
+ #[serde(default = "crate::U32::<60>::value", skip_serializing_if = "crate::U32::<60>::is_equal")]
+ ban_duration_for_exceeding_max_read_ios_per_query: u32,
+ #[serde(default = "crate::U32::<10>::value", skip_serializing_if = "crate::U32::<10>::is_equal")]
+ ban_duration_for_exceeding_tcp_tls_rate: u32,
+ #[serde(default = "crate::U8::<32>::value", skip_serializing_if = "crate::U8::<32>::is_equal")]
+ connections_mask_v4: u8,
+ #[serde(default = "crate::U8::<128>::value", skip_serializing_if = "crate::U8::<128>::is_equal")]
+ connections_mask_v6: u8,
+ #[serde(default, skip_serializing_if = "crate::is_default")]
+ connections_mask_v4_port: u8,
}
#[derive(Deserialize, Serialize, Debug, PartialEq)]
- name: "mask_port"
type: u8
default: "0"
- description: "Number of bits of port to consider over IPv4, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if the IPv4 parameter is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``"
+ description: "Number of bits of the port number to consider over IPv4, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if the IPv4 parameter is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``"
- name: "exclude_ranges"
type: "Vec<String>"
default: ""
default: ""
lua-name: "setTCPFastOpenKey"
runtime-configurable: false
+ - name: "connections_overload_threshold"
+ type: "u8"
+ default: "90"
+ lua-name: "setTCPConnectionsOverloadThreshold"
+ internal-field-name: "d_tcpConnectionsOverloadThreshold"
+ runtime-configurable: false
+ description: "Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted: only query per connection is allowed (no out-of-order processing, no idle time allowed), the receive timeout is reduced to 500 milliseconds and the total duration of the TCP connection is limited to 5 seconds"
+ - name: "max_connection_rate_per_client"
+ type: "u64"
+ default: "0"
+ lua-name: "setMaxTCPConnectionRatePerClient"
+ internal-field-name: "d_maxTCPConnectionsRatePerClient"
+ runtime-configurable: false
+ description: "Set the maximum number of new TCP connections that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``"
+ - name: "connection_rate_interval"
+ type: "u64"
+ default: "5"
+ lua-name: "setTCPConnectionRateInterval"
+ internal-field-name: "d_tcpConnectionsRatePerClientInterval"
+ runtime-configurable: false
+ description: "Set the interval, in minutes, over which new TCP and TLS per client connection rates are computed (see ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``)"
+ - name: "max_tls_new_session_rate_per_client"
+ type: "u64"
+ default: "0"
+ lua-name: "setMaxTLSNewSessionRatePerClient"
+ internal-field-name: "d_maxTLSNewSessionsRatePerClient"
+ runtime-configurable: false
+ description: "Set the maximum number of new TLS sessions, without resumption, that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_connection_rate_per_client`` and ```max_tls_resumed_session_rate_per_client`"
+ - name: "max_tls_resumed_session_rate_per_client"
+ type: "u64"
+ default: "0"
+ lua-name: "setMaxTLSResumedSessionRatePerClient"
+ internal-field-name: "d_maxTLSResumedSessionsRatePerClient"
+ runtime-configurable: false
+ description: "Set the maximum number of resumed TLS sessions that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_connection_rate_per_client`` and ```max_tls_new_session_rate_per_client`"
+ - name: "max_read_ios_per_query"
+ type: "u32"
+ default: "50"
+ lua-name: "setMaxTCPReadIOsPerQuery"
+ internal-field-name: "d_maxTCPReadIOsPerQuery"
+ runtime-configurable: false
+ description: "Set the maximum number of read events needed to receive a new query on a TCP connection. Usually reading a DNS query over a TCP connection requires two read events, one to read the query size and one to read the query itself. For large queries, on congested networks, a few short reads might occur, increasing the number of read operations needed to read the full query, but if a large number of read events is needed the client might be misbehaving or even actively trying to hurt the server. When this limit is reached, the TCP connection will be terminated and the offending client IP (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection for up to ``ban_duration_for_exceeding_max_read_ios_per_query`` seconds"
+ - name: "ban_duration_for_exceeding_max_read_ios_per_query"
+ type: "u32"
+ default: "60"
+ lua-name: "setBanDurationForExceedingMaxReadIOsPerQuery"
+ internal-field-name: "d_tcpBanDurationForExceedingMaxReadIOsPerQuery"
+ runtime-configurable: false
+ description: "Set for how long, in seconds, a client (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded ``max_read_ios_per_query`` over a TCP connection"
+ - name: "ban_duration_for_exceeding_tcp_tls_rate"
+ type: "u32"
+ default: "10"
+ lua-name: "setBanDurationForExceedingTCPTLSRate"
+ internal-field-name: "d_tcpBanDurationForExceedingTCPTLSRate"
+ runtime-configurable: false
+ description: "Set for how long, in seconds, a client (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` or ``max_tls_resumed_session_rate_per_client``"
+ - name: "connections_mask_v4"
+ type: "u8"
+ default: "32"
+ lua-name: "setTCPConnectionsMaskV4"
+ internal-field-name: "d_tcpConnectionsMaskV4"
+ runtime-configurable: false
+ description: "Mask to apply to IPv4 addresses when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``. In some scenarios it might make sense to apply these settings to a /28 range rather than a single address, for example"
+ - name: "connections_mask_v6"
+ type: "u8"
+ default: "128"
+ lua-name: "setTCPConnectionsMaskV6"
+ internal-field-name: "d_tcpConnectionsMaskV6"
+ runtime-configurable: false
+ description: "Mask to apply to IPv6 addresses when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``. In some scenarios it might make sense to apply these settings to a whole /64 IPv6 range instead of a single address, for example"
+ - name: "connections_mask_v4_port"
+ type: u8
+ default: "0"
+ lua-name: "setTCPConnectionsMaskV4Port"
+ internal-field-name: "d_tcpConnectionsMaskV4Port"
+ runtime-configurable: false
+ description: "Number of bits of port to consider when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client`` over IPv4, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if ``connections_mask_v4`` is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``"
udp_tuning:
category: "tuning.udp"
void resetForNewQuery();
- boost::optional<struct timeval> getClientReadTTD(struct timeval now) const
- {
- const auto& runtimeConfiguration = dnsdist::configuration::getCurrentRuntimeConfiguration();
- if (runtimeConfiguration.d_maxTCPConnectionDuration == 0 && runtimeConfiguration.d_tcpRecvTimeout == 0) {
- return boost::none;
- }
-
- if (runtimeConfiguration.d_maxTCPConnectionDuration > 0) {
- auto elapsed = now.tv_sec - d_connectionStartTime.tv_sec;
- if (elapsed < 0 || (static_cast<size_t>(elapsed) >= runtimeConfiguration.d_maxTCPConnectionDuration)) {
- return now;
- }
- auto remaining = runtimeConfiguration.d_maxTCPConnectionDuration - elapsed;
- if (runtimeConfiguration.d_tcpRecvTimeout == 0 || remaining <= static_cast<size_t>(runtimeConfiguration.d_tcpRecvTimeout)) {
- now.tv_sec += remaining;
- return now;
- }
- }
-
- now.tv_sec += runtimeConfiguration.d_tcpRecvTimeout;
- return now;
- }
-
- boost::optional<struct timeval> getClientWriteTTD(const struct timeval& now) const
- {
- const auto& runtimeConfiguration = dnsdist::configuration::getCurrentRuntimeConfiguration();
- if (runtimeConfiguration.d_maxTCPConnectionDuration == 0 && runtimeConfiguration.d_tcpSendTimeout == 0) {
- return boost::none;
- }
-
- timeval res(now);
-
- if (runtimeConfiguration.d_maxTCPConnectionDuration > 0) {
- auto elapsed = res.tv_sec - d_connectionStartTime.tv_sec;
- if (elapsed < 0 || static_cast<size_t>(elapsed) >= runtimeConfiguration.d_maxTCPConnectionDuration) {
- return res;
- }
- auto remaining = runtimeConfiguration.d_maxTCPConnectionDuration - elapsed;
- if (runtimeConfiguration.d_tcpSendTimeout == 0 || remaining <= static_cast<size_t>(runtimeConfiguration.d_tcpSendTimeout)) {
- res.tv_sec += remaining;
- return res;
- }
- }
-
- res.tv_sec += runtimeConfiguration.d_tcpSendTimeout;
- return res;
- }
-
- bool maxConnectionDurationReached(unsigned int maxConnectionDuration, const struct timeval& now)
- {
- if (maxConnectionDuration) {
- time_t curtime = now.tv_sec;
- unsigned int elapsed = 0;
- if (curtime > d_connectionStartTime.tv_sec) { // To prevent issues when time goes backward
- elapsed = curtime - d_connectionStartTime.tv_sec;
- }
- if (elapsed >= maxConnectionDuration) {
- return true;
- }
- }
-
- return false;
- }
+ boost::optional<timeval> getClientReadTTD(timeval now) const;
+ boost::optional<timeval> getClientWriteTTD(const timeval& now) const;
+ bool maxConnectionDurationReached(unsigned int maxConnectionDuration, const timeval& now) const;
std::shared_ptr<TCPConnectionToBackend> getDownstreamConnection(std::shared_ptr<DownstreamState>& backend, const std::unique_ptr<std::vector<ProxyProtocolValue>>& tlvs, const struct timeval& now);
void registerOwnedDownstreamConnection(std::shared_ptr<TCPConnectionToBackend>& conn);
IOState handleIncomingQueryReceived(const struct timeval& now);
void handleExceptionDuringIO(const std::exception& exp);
bool readIncomingQuery(const timeval& now, IOState& iostate);
+ bool isNearTCPLimits() const;
enum class State : uint8_t { starting, doingHandshake, readingProxyProtocolHeader, waitingForQuery, readingQuerySize, readingQuery, sendingResponse, idle /* in case of XFR, we stop processing queries */ };
std::unique_ptr<IOStateHandler> d_ioState{nullptr};
std::unique_ptr<std::vector<ProxyProtocolValue>> d_proxyProtocolValues{nullptr};
TCPClientThreadData& d_threadData;
+ uint64_t d_readIOsTotal{0};
size_t d_currentPos{0};
size_t d_proxyProtocolNeed{0};
size_t d_queriesCount{0};
size_t d_currentQueriesCount{0};
std::thread::id d_creatorThreadID;
uint16_t d_querySize{0};
+ uint16_t d_readIOsCurrentQuery{0};
State d_state{State::starting};
bool d_isXFR{false};
bool d_proxyProtocolPayloadHasTLV{false};
std::atomic<uint64_t> g_tcpStatesDumpRequested{0};
-LockGuarded<std::map<ComboAddress, size_t, ComboAddress::addressOnlyLessThan>> dnsdist::IncomingConcurrentTCPConnectionsManager::s_tcpClientsConcurrentConnectionsCount;
-
IncomingTCPConnectionState::~IncomingTCPConnectionState()
{
dnsdist::IncomingConcurrentTCPConnectionsManager::accountClosedTCPConnection(d_ci.remote);
gettimeofday(&now, nullptr);
auto diff = now - d_connectionStartTime;
- d_ci.cs->updateTCPMetrics(d_queriesCount, diff.tv_sec * 1000 + diff.tv_usec / 1000);
+ d_ci.cs->updateTCPMetrics(d_queriesCount, diff.tv_sec * 1000 + diff.tv_usec / 1000, d_queriesCount > 0 ? d_readIOsTotal / d_queriesCount : d_readIOsTotal);
}
// would have been done when the object is destroyed anyway,
return {nullptr, tlvsMismatch};
}
+bool IncomingTCPConnectionState::isNearTCPLimits() const
+{
+ if (d_ci.d_restricted) {
+ return true;
+ }
+
+ const auto tcpConnectionsOverloadThreshold = dnsdist::configuration::getImmutableConfiguration().d_tcpConnectionsOverloadThreshold;
+ if (tcpConnectionsOverloadThreshold == 0) {
+ return false;
+ }
+
+ const auto& clientState = d_ci.cs;
+ if (clientState->d_tcpConcurrentConnectionsLimit > 0) {
+ auto concurrentConnections = clientState->tcpCurrentConnections.load();
+ auto current = (100 * concurrentConnections) / clientState->d_tcpConcurrentConnectionsLimit;
+ if (current >= tcpConnectionsOverloadThreshold) {
+ return true;
+ }
+ }
+
+ return dnsdist::IncomingConcurrentTCPConnectionsManager::isClientOverThreshold(d_ci.remote);
+}
+
std::shared_ptr<TCPConnectionToBackend> IncomingTCPConnectionState::getDownstreamConnection(std::shared_ptr<DownstreamState>& backend, const std::unique_ptr<std::vector<ProxyProtocolValue>>& tlvs, const struct timeval& now)
{
auto [downstream, tlvsMismatch] = getOwnedDownstreamConnection(d_ownedConnectionsToBackend, backend, tlvs);
return false;
}
+ if (isNearTCPLimits()) {
+ d_ci.d_restricted = true;
+ DEBUGLOG("not accepting new queries because we already near our TCP limits");
+ return false;
+ }
+
// for DoH, this is already handled by the underlying library
if (!d_ci.cs->dohFrontend && d_currentQueriesCount >= d_ci.cs->d_maxInFlightQueriesPerConn) {
DEBUGLOG("not accepting new queries because we already have " << d_currentQueriesCount << " out of " << d_ci.cs->d_maxInFlightQueriesPerConn);
d_currentPos = 0;
d_querySize = 0;
d_state = State::waitingForQuery;
+ d_readIOsTotal += d_readIOsCurrentQuery;
+ d_readIOsCurrentQuery = 0;
+}
+
+boost::optional<timeval> IncomingTCPConnectionState::getClientReadTTD(timeval now) const
+{
+ const auto& runtimeConfiguration = dnsdist::configuration::getCurrentRuntimeConfiguration();
+ if (!isNearTCPLimits() && runtimeConfiguration.d_maxTCPConnectionDuration == 0 && runtimeConfiguration.d_tcpRecvTimeout == 0) {
+ return boost::none;
+ }
+
+ size_t maxTCPConnectionDuration = runtimeConfiguration.d_maxTCPConnectionDuration;
+ uint16_t tcpRecvTimeout = runtimeConfiguration.d_tcpRecvTimeout;
+ uint32_t tcpRecvTimeoutUsec = 0U;
+ if (isNearTCPLimits()) {
+ constexpr size_t maxTCPConnectionDurationNearLimits = 5U;
+ constexpr uint32_t tcpRecvTimeoutUsecNearLimits = 500U * 1000U;
+ maxTCPConnectionDuration = runtimeConfiguration.d_maxTCPConnectionDuration != 0 ? std::min(runtimeConfiguration.d_maxTCPConnectionDuration, maxTCPConnectionDurationNearLimits) : maxTCPConnectionDurationNearLimits;
+ tcpRecvTimeout = 0;
+ tcpRecvTimeoutUsec = tcpRecvTimeoutUsecNearLimits;
+ }
+
+ if (maxTCPConnectionDuration > 0) {
+ auto elapsed = now.tv_sec - d_connectionStartTime.tv_sec;
+ if (elapsed < 0 || (static_cast<size_t>(elapsed) >= maxTCPConnectionDuration)) {
+ return now;
+ }
+ auto remaining = maxTCPConnectionDuration - elapsed;
+ if (!isNearTCPLimits() && (runtimeConfiguration.d_tcpRecvTimeout == 0 || remaining <= static_cast<size_t>(runtimeConfiguration.d_tcpRecvTimeout))) {
+ now.tv_sec += static_cast<time_t>(remaining);
+ return now;
+ }
+ }
+
+ now.tv_sec += static_cast<time_t>(tcpRecvTimeout);
+ now.tv_usec += tcpRecvTimeoutUsec;
+ normalizeTV(now);
+ return now;
+}
+
+boost::optional<timeval> IncomingTCPConnectionState::getClientWriteTTD(const timeval& now) const
+{
+ const auto& runtimeConfiguration = dnsdist::configuration::getCurrentRuntimeConfiguration();
+ if (runtimeConfiguration.d_maxTCPConnectionDuration == 0 && runtimeConfiguration.d_tcpSendTimeout == 0) {
+ return boost::none;
+ }
+
+ timeval res(now);
+
+ if (runtimeConfiguration.d_maxTCPConnectionDuration > 0) {
+ auto elapsed = res.tv_sec - d_connectionStartTime.tv_sec;
+ if (elapsed < 0 || static_cast<size_t>(elapsed) >= runtimeConfiguration.d_maxTCPConnectionDuration) {
+ return res;
+ }
+ auto remaining = runtimeConfiguration.d_maxTCPConnectionDuration - elapsed;
+ if (runtimeConfiguration.d_tcpSendTimeout == 0 || remaining <= static_cast<size_t>(runtimeConfiguration.d_tcpSendTimeout)) {
+ res.tv_sec += static_cast<time_t>(remaining);
+ return res;
+ }
+ }
+
+ res.tv_sec += static_cast<time_t>(runtimeConfiguration.d_tcpSendTimeout);
+ return res;
+}
+
+bool IncomingTCPConnectionState::maxConnectionDurationReached(unsigned int maxConnectionDuration, const timeval& now) const
+{
+ if (maxConnectionDuration > 0) {
+ time_t curtime = now.tv_sec;
+ unsigned int elapsed = 0;
+ if (curtime > d_connectionStartTime.tv_sec) { // To prevent issues when time goes backward
+ elapsed = curtime - d_connectionStartTime.tv_sec;
+ }
+ if (elapsed >= maxConnectionDuration) {
+ return true;
+ }
+ }
+
+ return false;
}
void IncomingTCPConnectionState::registerOwnedDownstreamConnection(std::shared_ptr<TCPConnectionToBackend>& conn)
if (d_handler.isTLS()) {
if (!d_handler.hasTLSSessionBeenResumed()) {
++d_ci.cs->tlsNewSessions;
+ dnsdist::IncomingConcurrentTCPConnectionsManager::accountTLSNewSession(d_ci.remote);
}
else {
++d_ci.cs->tlsResumptions;
+ dnsdist::IncomingConcurrentTCPConnectionsManager::accountTLSResumedSession(d_ci.remote);
}
if (d_handler.getResumedFromInactiveTicketKey()) {
++d_ci.cs->tlsInactiveTicketKey;
if (!d_lastIOBlocked && (d_state == State::waitingForQuery || d_state == State::readingQuerySize)) {
DEBUGLOG("reading query size");
d_buffer.resize(sizeof(uint16_t));
+ d_readIOsCurrentQuery++;
iostate = d_handler.tryRead(d_buffer, d_currentPos, sizeof(uint16_t));
if (d_currentPos > 0) {
/* if we got at least one byte, we can't go around sending responses */
if (!d_lastIOBlocked && d_state == State::readingQuery) {
DEBUGLOG("reading query");
+ d_readIOsCurrentQuery++;
iostate = d_handler.tryRead(d_buffer, d_currentPos, d_querySize);
if (iostate == IOState::Done) {
iostate = handleIncomingQueryReceived(now);
return;
}
+ const auto& immutable = dnsdist::configuration::getImmutableConfiguration();
+ if (d_readIOsCurrentQuery >= immutable.d_maxTCPReadIOsPerQuery) {
+ vinfolog("Terminating TCP connection from %s for reaching the maximum number of read IO events per query (%d)", d_ci.remote.toStringWithPort(), immutable.d_maxTCPReadIOsPerQuery);
+ dnsdist::IncomingConcurrentTCPConnectionsManager::banClientFor(d_ci.remote, time(nullptr), immutable.d_tcpBanDurationForExceedingMaxReadIOsPerQuery);
+ return;
+ }
+
d_lastIOBlocked = false;
try {
try {
t_downstreamTCPConnectionsManager.cleanupClosedConnections(now);
+ dnsdist::IncomingConcurrentTCPConnectionsManager::cleanup(time(nullptr));
if (now.tv_sec > lastTimeoutScan) {
lastTimeoutScan = now.tv_sec;
return;
}
- if (!dnsdist::IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(remote)) {
- vinfolog("Dropping TCP connection from %s because we have too many from this client already", remote.toStringWithPort());
+ auto connectionResult = dnsdist::IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(remote, connInfo.cs->hasTLS());
+ if (connectionResult == dnsdist::IncomingConcurrentTCPConnectionsManager::NewConnectionResult::Denied) {
return;
}
tcpClientCountIncremented = true;
+ if (connectionResult == dnsdist::IncomingConcurrentTCPConnectionsManager::NewConnectionResult::Restricted) {
+ connInfo.d_restricted = true;
+ }
vinfolog("Got TCP connection from %s", remote.toStringWithPort());
ComboAddress remote;
ClientState* cs{nullptr};
int fd{-1};
+ bool d_restricted{false};
};
class InternalQuery
output << "# TYPE " << frontsbase << "tcpavgqueriesperconnection " << "gauge" << "\n";
output << "# HELP " << frontsbase << "tcpavgconnectionduration " << "The average duration of a TCP connection (ms)" << "\n";
output << "# TYPE " << frontsbase << "tcpavgconnectionduration " << "gauge" << "\n";
+ output << "# HELP " << frontsbase << "tcpavgreadios " << "The average number of read IO operations per query over a TCP connection" << "\n";
+ output << "# TYPE " << frontsbase << "tcpavgreadios " << "gauge" << "\n";
output << "# HELP " << frontsbase << "tlsqueries " << "Number of queries received by dnsdist over TLS, by TLS version" << "\n";
output << "# TYPE " << frontsbase << "tlsqueries " << "counter" << "\n";
output << "# HELP " << frontsbase << "tlsnewsessions " << "Amount of new TLS sessions negotiated" << "\n";
output << frontsbase << "tcpmaxconcurrentconnections" << label << front->tcpMaxConcurrentConnections.load() << "\n";
output << frontsbase << "tcpavgqueriesperconnection" << label << front->tcpAvgQueriesPerConnection.load() << "\n";
output << frontsbase << "tcpavgconnectionduration" << label << front->tcpAvgConnectionDuration.load() << "\n";
+ output << frontsbase << "tcpavgreadios" << label << front->tcpAvgIOsPerConnection << "\n";
if (front->hasTLS()) {
output << frontsbase << "tlsnewsessions" << label << front->tlsNewSessions.load() << "\n";
output << frontsbase << "tlsresumptions" << label << front->tlsResumptions.load() << "\n";
stat_t tls12queries{0}; // valid DNS queries received via TLSv1.2
stat_t tls13queries{0}; // valid DNS queries received via TLSv1.3
stat_t tlsUnknownqueries{0}; // valid DNS queries received via unknown TLS version
+ pdns::stat_double_t tcpAvgIOsPerConnection{0.0};
pdns::stat_double_t tcpAvgQueriesPerConnection{0.0};
/* in ms */
pdns::stat_double_t tcpAvgConnectionDuration{0.0};
d_filter = bpf;
}
- void updateTCPMetrics(size_t nbQueries, uint64_t durationMs)
+ void updateTCPMetrics(size_t nbQueries, uint64_t durationMs, size_t nbIOs)
{
tcpAvgQueriesPerConnection = (99.0 * tcpAvgQueriesPerConnection / 100.0) + (nbQueries / 100.0);
tcpAvgConnectionDuration = (99.0 * tcpAvgConnectionDuration / 100.0) + (durationMs / 100.0);
+ tcpAvgIOsPerConnection = (99.0 * tcpAvgIOsPerConnection / 100.0) + (nbIOs / 100.0);
}
};
Tuning related functions
========================
+.. function:: setBanDurationForExceedingMaxReadIOsPerQuery(num)
+
+ .. versionadded:: 2.0.0
+
+ Set for how long, in seconds, a client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded :func:`setMaxTCPReadIOsPerQuery` over a TCP connection. Default is 60 seconds.
+
+.. function:: setBanDurationForExceedingTCPTLSRate(num)
+
+ .. versionadded:: 2.0.0
+
+ Set for how long, in seconds, a client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` or :func:`setMaxTLSResumedSessionRatePerClient`. Default is 10 seconds.
+
+ :param int num: Duration of the ban in seconds
+
.. function:: setDoHDownstreamCleanupInterval(interval)
.. versionadded:: 1.7.0
.. function:: setMaxTCPConnectionDuration(num)
- Set the maximum duration of an incoming TCP connection, in seconds. 0 (the default) means unlimited
+ Set the maximum duration of an incoming TCP connection, in seconds. 0 (the default) means unlimited.
:param int num:
+.. function:: setMaxTCPConnectionRatePerClient(num)
+
+ .. versionadded:: 2.0.0
+
+ Set the maximum number of new TCP connections that a given client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) can open, per second, over the last :func:`setTCPConnectionRateInterval` minutes. Clients exceeding this rate will not be able to open new TCP connections for :func:`setBanDurationForExceedingTCPTLSRate` seconds. See also :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient`. 0 (the default) means unlimited.
+
+ :param int num: Number of new connections per second
+
.. function:: setMaxTCPConnectionsPerClient(num)
- Set the maximum number of TCP connections per client. 0 (the default) means unlimited
+ Set the maximum number of TCP connections per client. 0 (the default) means unlimited.
:param int num:
.. function:: setMaxTCPQueriesPerConnection(num)
- Set the maximum number of queries in an incoming TCP connection. 0 (the default) means unlimited
+ Set the maximum number of queries in an incoming TCP connection. 0 (the default) means unlimited.
:param int num:
.. versionchanged:: 1.6.0
Before 1.6.0 the default value was 1000 on all systems.
- Set the maximum number of TCP connections queued (waiting to be picked up by a client thread), defaults to 1000 (10000 on Linux since 1.6.0). 0 means unlimited
+ Set the maximum number of TCP connections queued (waiting to be picked up by a client thread), defaults to 1000 (10000 on Linux since 1.6.0). 0 means unlimited.
:param int num:
+.. function:: setMaxTCPReadIOsPerQuery(num)
+
+ .. versionadded:: 2.0.0
+
+ Set the maximum number of read events needed to receive a new query on a TCP connection. Usually reading a DNS query over a TCP connection requires two read events, one to read the query size and one to read the query itself. For large queries, on congested networks, a few short reads might occur, increasing the number of read operations needed to read the full query, but if a large number of read events is needed the client might be misbehaving or even actively trying to hurt the server. When this limit is reached, the TCP connection will be terminated and the offending client IP (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) will be prevented from opening a new TCP connection for up to :func:`setBanDurationForExceedingMaxReadIOsPerQuery` seconds. Default is 50.
+
+ :param int num: Number of read IO events per query
+
.. function:: setMaxUDPOutstanding(num)
.. versionchanged:: 1.4.0
Before 1.4.0 the default value was 10240
- Set the maximum number of outstanding UDP queries to a given backend server. This can only be set at configuration time and defaults to 65535 (10240 before 1.4.0)
+ Set the maximum number of outstanding UDP queries to a given backend server. This can only be set at configuration time and defaults to 65535 (10240 before 1.4.0).
:param int num:
+.. function:: setMaxTLSNewSessionRatePerClient(num)
+
+ .. versionadded:: 2.0.0
+
+ Set the maximum number of new TLS sessions, without resumption, that a given client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) can open, per second, over the last :func:`setTCPConnectionRateInterval` minutes. Clients exceeding this rate will not be able to open new TCP connections for :func:`setBanDurationForExceedingTCPTLSRate` seconds. See also :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTCPConnectionRatePerClient`. 0 (the default) means unlimited.
+
+ :param int num: Number of resumed sessions per second
+
+.. function:: setMaxTLSResumedSessionRatePerClient(num)
+
+ .. versionadded:: 2.0.0
+
+ Set the maximum number of resumed TLS sessions that a given client (or range, see :func:`setTCPConnectionsMaskV4`, :func:`setTCPConnectionsMaskV6` and :func:`setTCPConnectionsMaskV4Port` to see how clients can be aggregated) can open, per second, over the last :func:`setTCPConnectionRateInterval` minutes. Clients exceeding this rate will not be able to open new TCP connections for :func:`setBanDurationForExceedingTCPTLSRate` seconds. See also :func:`setMaxTLSResumedSessionRatePerClient` and :func:`setMaxTCPConnectionRatePerClient`. 0 (the default) means unlimited.
+
+ :param int num: Number of new sessions per second
+
.. function:: setCacheCleaningDelay(num)
- Set the interval in seconds between two runs of the cache cleaning algorithm, removing expired entries. Default is every 60s
+ Set the interval in seconds between two runs of the cache cleaning algorithm, removing expired entries. Default is every 60s.
:param int num:
.. function:: setCacheCleaningPercentage(num)
- Set the percentage of the cache that the cache cleaning algorithm will try to free by removing expired entries. By default (100), all expired entries are removed
+ Set the percentage of the cache that the cache cleaning algorithm will try to free by removing expired entries. By default (100), all expired entries are removed.
:param int num:
.. function:: setStaleCacheEntriesTTL(num)
- Allows using cache entries expired for at most n seconds when no backend available to answer for a query
+ Allows using cache entries expired for at most n seconds when no backend available to answer for a query.
:param int num:
+.. function:: setTCPConnectionRateInterval(num)
+
+ .. versionadded:: 2.0.0
+
+ Set the interval, in minutes, over which new TCP and TLS per client connection rates are computed (see :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient`). Default is 5.
+
+ :param int num: Interval in minutes
+
+.. function:: setTCPConnectionsMaskV4(num)
+
+ .. versionadded:: 2.0.0
+
+ Mask to apply to IPv4 addresses when enforcing :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient`. In some scenarios it might make sense to apply these settings to a /28 range rather than a single address, for example. Default is 32.
+
+ :param int num: Number of bits to keep
+
+.. function:: setTCPConnectionsMaskV4Port(num)
+
+ .. versionadded:: 2.0.0
+
+ Number of bits of the port number to consider when enforcing :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient` over IPv4 addresses, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if :func:`setTCPConnectionsMaskV4` is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``.
+
+ :param int num: Number of bits to keep
+
+Number of bits of port to consider when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client`` over IPv4, for CGNAT deployments.
+
+.. function:: setTCPConnectionsMaskV6(num)
+
+ .. versionadded:: 2.0.0
+
+ Mask to apply to IPv6 addresses when enforcing :func:`setMaxTCPConnectionRatePerClient`, :func:`setMaxTLSNewSessionRatePerClient` and :func:`setMaxTLSResumedSessionRatePerClient`. In some scenarios it might make sense to apply these settings to a whole /64 IPv6 range rather than a single address, for example. Default is 128.
+
+ :param int num: Number of bits to keep
+
+.. function:: setTCPConnectionsOverloadThreshold(num)
+
+ .. versionadded:: 2.0.0
+
+ Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted: only query per connection is allowed (no out-of-order processing, no idle time allowed), the receive timeout is reduced to 500 milliseconds and the total duration of the TCP connection is limited to 5 seconds. Default is 90.
+
+ :param int num: Threshold in percent
+
.. function:: setTCPDownstreamCleanupInterval(interval)
.. versionadded:: 1.6.0
.. function:: setTCPRecvTimeout(num)
- Set the read timeout on TCP connections from the client, in seconds. Defaults to 2
+ Set the read timeout on TCP connections from the client, in seconds. Defaults to 2.
:param int num:
.. function:: setTCPSendTimeout(num)
- Set the write timeout on TCP connections from the client, in seconds. Defaults to 2
+ Set the write timeout on TCP connections from the client, in seconds. Defaults to 2.
:param int num:
.. function:: setUDPTimeout(num)
- Set the maximum time dnsdist will wait for a response from a backend over UDP, in seconds. Defaults to 2
+ Set the maximum time dnsdist will wait for a response from a backend over UDP, in seconds. Defaults to 2.
:param int num:
- **outgoing_max_idle_connection_per_backend**: Unsigned integer ``(10)``
- **max_connections_per_client**: Unsigned integer ``(0)``
- **fast_open_key**: String ``("")``
+- **connections_overload_threshold**: Unsigned integer ``(90)`` - Set a threshold as a percentage to the maximum number of incoming TCP connections per frontend or per client. When this threshold is reached, new incoming TCP connections are restricted: only query per connection is allowed (no out-of-order processing, no idle time allowed), the receive timeout is reduced to 500 milliseconds and the total duration of the TCP connection is limited to 5 seconds
+- **max_connection_rate_per_client**: Unsigned integer ``(0)`` - Set the maximum number of new TCP connections that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``
+- **connection_rate_interval**: Unsigned integer ``(5)`` - Set the interval, in minutes, over which new TCP and TLS per client connection rates are computed (see ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``)
+- **max_tls_new_session_rate_per_client**: Unsigned integer ``(0)`` - Set the maximum number of new TLS sessions, without resumption, that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_connection_rate_per_client`` and ```max_tls_resumed_session_rate_per_client`
+- **max_tls_resumed_session_rate_per_client**: Unsigned integer ``(0)`` - Set the maximum number of resumed TLS sessions that a given client (see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) can open, per second, over the last ``connection_rate_interval`` minutes. Clients exceeding this rate will not be able to open new TCP connections for ``ban_duration_for_exceeding_tcp_tls_rate`` seconds. See also ``max_connection_rate_per_client`` and ```max_tls_new_session_rate_per_client`
+- **max_read_ios_per_query**: Unsigned integer ``(50)`` - Set the maximum number of read events needed to receive a new query on a TCP connection. Usually reading a DNS query over a TCP connection requires two read events, one to read the query size and one to read the query itself. For large queries, on congested networks, a few short reads might occur, increasing the number of read operations needed to read the full query, but if a large number of read events is needed the client might be misbehaving or even actively trying to hurt the server. When this limit is reached, the TCP connection will be terminated and the offending client IP (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection for up to ``ban_duration_for_exceeding_max_read_ios_per_query`` seconds
+- **ban_duration_for_exceeding_max_read_ios_per_query**: Unsigned integer ``(60)`` - Set for how long, in seconds, a client (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded ``max_read_ios_per_query`` over a TCP connection
+- **ban_duration_for_exceeding_tcp_tls_rate**: Unsigned integer ``(10)`` - Set for how long, in seconds, a client (or range, see ``connections_mask_v4``, ``connections_mask_v6`` and ``connection_mask_v4_port`` to see how clients can be aggregated) will be prevented from opening a new TCP connection when it has exceeded ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` or ``max_tls_resumed_session_rate_per_client``
+- **connections_mask_v4**: Unsigned integer ``(32)`` - Mask to apply to IPv4 addresses when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``. In some scenarios it might make sense to apply these settings to a /28 range rather than a single address, for example
+- **connections_mask_v6**: Unsigned integer ``(128)`` - Mask to apply to IPv6 addresses when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client``. In some scenarios it might make sense to apply these settings to a whole /64 IPv6 range instead of a single address, for example
+- **connections_mask_v4_port**: Unsigned integer ``(0)`` - Number of bits of port to consider when enforcing ``max_connection_rate_per_client``, ``max_tls_new_session_rate_per_client`` and ``max_tls_resumed_session_rate_per_client`` over IPv4, for CGNAT deployments. Default is 0 meaning that the port is not taken into account. For example passing ``2`` here, which only makes sense if ``connections_mask_v4`` is set to ``32``, will split a given IPv4 address into four port ranges: ``0-16383``, ``16384-32767``, ``32768-49151`` and ``49152-65535``
.. _yaml-settings-TlsEngineConfiguration:
auto diff = now - conn->d_connectionStartTime;
conn->d_acceptCtx->decrementConcurrentConnections();
- conn->d_acceptCtx->d_cs->updateTCPMetrics(conn->d_nbQueries, diff.tv_sec * 1000 + diff.tv_usec / 1000);
+ conn->d_acceptCtx->d_cs->updateTCPMetrics(conn->d_nbQueries, diff.tv_sec * 1000 + diff.tv_usec / 1000, 0);
}
dnsdist::IncomingConcurrentTCPConnectionsManager::accountClosedTCPConnection(conn->d_remote);
return;
}
- if (!dnsdist::IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(remote)) {
- vinfolog("Dropping DoH connection from %s because we have too many from this client already", remote.toStringWithPort());
+ auto connectionResult = dnsdist::IncomingConcurrentTCPConnectionsManager::accountNewTCPConnection(remote, false);
+ if (connectionResult == dnsdist::IncomingConcurrentTCPConnectionsManager::NewConnectionResult::Denied) {
h2o_socket_close(sock);
return;
}
src_dir / 'dnsdist-backend.cc',
src_dir / 'dnsdist-cache.cc',
src_dir / 'dnsdist-carbon.cc',
+ src_dir / 'dnsdist-concurrent-connections.cc',
src_dir / 'dnsdist-configuration.cc',
src_dir / 'dnsdist-configuration-yaml.cc',
src_dir / 'dnsdist-console.cc',
_maxTCPConnsPerClient = 3
_maxTCPConnDuration = 5
_config_template = """
- newServer{address="127.0.0.1:%s"}
- setTCPRecvTimeout(%s)
- setMaxTCPQueriesPerConnection(%s)
- setMaxTCPConnectionsPerClient(%s)
- setMaxTCPConnectionDuration(%s)
+ newServer{address="127.0.0.1:%d"}
+ setTCPRecvTimeout(%d)
+ setMaxTCPQueriesPerConnection(%d)
+ setMaxTCPConnectionsPerClient(%d)
+ setMaxTCPConnectionDuration(%d)
+ -- disable "near limits" otherwise our tests are broken because connections are forcibly closed
+ setTCPConnectionsOverloadThreshold(0)
"""
_config_params = ['_testServerPort', '_tcpIdleTimeout', '_maxTCPQueriesPerConn', '_maxTCPConnsPerClient', '_maxTCPConnDuration']
_verboseMode = True
_tcpIdleTimeout = 2
_maxTCPConnsPerFrontend = 10
_config_template = """
- newServer{address="127.0.0.1:%s"}
+ newServer{address="127.0.0.1:%d"}
setLocal("%s:%d", {maxConcurrentTCPConnections=%d})
+ -- disable "near limits" otherwise our tests are broken because connections are forcibly closed
+ setTCPConnectionsOverloadThreshold(0)
"""
_config_params = ['_testServerPort', '_dnsDistListeningAddr', '_dnsDistPort', '_maxTCPConnsPerFrontend']
_verboseMode = True