From: Otto Moerbeek Date: Mon, 6 Nov 2023 08:35:01 +0000 (+0100) Subject: If a chain is long, refuse to add more entries to it (including metrics) X-Git-Tag: rec-5.1.0-beta1~22^2~9 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5648c3708db2e956a987718e0ce40633b9ebc312;p=thirdparty%2Fpdns.git If a chain is long, refuse to add more entries to it (including metrics) --- diff --git a/pdns/recursordist/RECURSOR-MIB.txt b/pdns/recursordist/RECURSOR-MIB.txt index f80eab30c0..ba4f037e8c 100644 --- a/pdns/recursordist/RECURSOR-MIB.txt +++ b/pdns/recursordist/RECURSOR-MIB.txt @@ -60,6 +60,9 @@ rec MODULE-IDENTITY REVISION "202306080000Z" DESCRIPTION "Added metrics for NOD and UDR events" + REVISION "202311060000Z" + DESCRIPTION "Added metrics for maximum chain length and weight" + ::= { powerdns 2 } powerdns OBJECT IDENTIFIER ::= { enterprises 43315 } @@ -1250,6 +1253,22 @@ udrEvents OBJECT-TYPE "Count of UDR events" ::= { stats 148 } +maxChainLength OBJECT-TYPE + SYNTAX Counter64 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "Maximum chain length" + ::= { stats 149 } + +maxChainWeight OBJECT-TYPE + SYNTAX Counter64 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "Maximum chain weight" + ::= { stats 150 } + --- --- Traps / Notifications --- @@ -1445,7 +1464,9 @@ recGroup OBJECT-GROUP packetCacheContended, packetCacheAcquired, nodEvents, - udrEvents + udrEvents, + maxChainLength, + macChainWeight } STATUS current DESCRIPTION "Objects conformance group for PowerDNS Recursor" diff --git a/pdns/recursordist/docs/metrics.rst b/pdns/recursordist/docs/metrics.rst index 6c20957165..31d05ac67c 100644 --- a/pdns/recursordist/docs/metrics.rst +++ b/pdns/recursordist/docs/metrics.rst @@ -535,6 +535,14 @@ max-cache-entries ^^^^^^^^^^^^^^^^^ currently configured maximum number of cache entries +max-chain-length +^^^^^^^^^^^^^^^^ +maximum chain length + +max-chain-weight +^^^^^^^^^^^^^^^^ +maximum chain weight. The weight of a chain of outgoing queries is the product of the number of chained queries by the size of the response received from the external authoritative server. + max-packetcache-entries ^^^^^^^^^^^^^^^^^^^^^^^ currently configured maximum number of packet cache entries diff --git a/pdns/recursordist/pdns_recursor.cc b/pdns/recursordist/pdns_recursor.cc index ad3ad7ac61..b360e94dd7 100644 --- a/pdns/recursordist/pdns_recursor.cc +++ b/pdns/recursordist/pdns_recursor.cc @@ -291,8 +291,15 @@ LWResult::Result asendto(const void* data, size_t len, int /* flags */, assert(chain.first->key->domain == pident->domain); // NOLINT // don't chain onto existing chained waiter or a chain already processed if (chain.first->key->fd > -1 && !chain.first->key->closed) { - chain.first->key->chain.insert(qid); // we can chain + if (g_maxChainLength > 0 && chain.first->key->authReqChain.size() >= g_maxChainLength) { + return LWResult::Result::OSLimitError; + } + chain.first->key->authReqChain.insert(qid); // we can chain *fileDesc = -1; // gets used in waitEvent / sendEvent later on + auto maxLength = t_Counters.at(rec::Counter::maxChainLength); + if (chain.first->key->authReqChain.size() > maxLength) { + t_Counters.at(rec::Counter::maxChainLength) = chain.first->key->authReqChain.size(); + } return LWResult::Result::Success; } } @@ -2834,13 +2841,20 @@ static void doResends(MT_t::waiters_t::iterator& iter, const std::shared_ptrkey->closed = true; - if (iter->key->chain.empty()) { + if (iter->key->authReqChain.empty()) { return; } - for (auto i = iter->key->chain.begin(); i != iter->key->chain.end(); ++i) { + + auto maxWeight = t_Counters.at(rec::Counter::maxChainWeight); + auto weight = iter->key->authReqChain.size() * content.size(); + if (weight > maxWeight) { + t_Counters.at(rec::Counter::maxChainWeight) = weight; + } + + for (auto qid: iter->key->authReqChain) { auto packetID = std::make_shared(*resend); packetID->fd = -1; - packetID->id = *i; + packetID->id = qid; g_multiTasker->sendEvent(packetID, &content); t_Counters.at(rec::Counter::chainResends)++; } diff --git a/pdns/recursordist/rec-main.cc b/pdns/recursordist/rec-main.cc index 8a5c015d3b..f808b464a5 100644 --- a/pdns/recursordist/rec-main.cc +++ b/pdns/recursordist/rec-main.cc @@ -107,6 +107,7 @@ NetmaskGroup g_proxyProtocolACL; std::set g_proxyProtocolExceptions; boost::optional g_dns64Prefix{boost::none}; DNSName g_dns64PrefixReverse; +unsigned int g_maxChainLength; std::shared_ptr g_initialDomainMap; // new threads needs this to be setup std::shared_ptr g_initialAllowFrom; // new thread needs to be setup with this std::shared_ptr g_initialAllowNotifyFrom; // new threads need this to be setup @@ -2308,6 +2309,7 @@ static int serviceMain(Logr::log_t log) g_maxUDPQueriesPerRound = ::arg().asNum("max-udp-queries-per-round"); g_useKernelTimestamp = ::arg().mustDo("protobuf-use-kernel-timestamp"); + g_maxChainLength = ::arg().asNum("max-chain-length"); disableStats(StatComponent::API, ::arg()["stats-api-blacklist"]); disableStats(StatComponent::Carbon, ::arg()["stats-carbon-blacklist"]); diff --git a/pdns/recursordist/rec-main.hh b/pdns/recursordist/rec-main.hh index 74168567a3..701cf82d97 100644 --- a/pdns/recursordist/rec-main.hh +++ b/pdns/recursordist/rec-main.hh @@ -212,6 +212,7 @@ extern double g_balancingFactor; extern size_t g_maxUDPQueriesPerRound; extern bool g_useKernelTimestamp; extern bool g_allowNoRD; +extern unsigned int g_maxChainLength; extern thread_local std::shared_ptr t_allowFrom; extern thread_local std::shared_ptr t_allowNotifyFrom; extern thread_local std::shared_ptr t_allowNotifyFor; diff --git a/pdns/recursordist/rec-snmp.cc b/pdns/recursordist/rec-snmp.cc index 19b875beb5..0684ed5a72 100644 --- a/pdns/recursordist/rec-snmp.cc +++ b/pdns/recursordist/rec-snmp.cc @@ -195,6 +195,8 @@ static const oid10 packetCacheContendedOID = {RECURSOR_STATS_OID, 145}; static const oid10 packetCacheAcquiredOID = {RECURSOR_STATS_OID, 146}; static const oid10 nodEventsOID = {RECURSOR_STATS_OID, 147}; static const oid10 udrEventsOID = {RECURSOR_STATS_OID, 148}; +static const oid10 maxChainLengthOID = {RECURSOR_STATS_OID, 149}; +static const oid10 maxChainWeightOID = {RECURSOR_STATS_OID, 150}; static std::unordered_map s_statsMap; @@ -451,6 +453,8 @@ RecursorSNMPAgent::RecursorSNMPAgent(const std::string& name, const std::string& registerCounter64Stat("packetcache-acquired", packetCacheAcquiredOID); registerCounter64Stat("nod-events", nodEventsOID); registerCounter64Stat("udr-events", udrEventsOID); + registerCounter64Stat("max-chain-length", maxChainLengthOID); + registerCounter64Stat("max-chain-weight", maxChainWeightOID); #endif /* HAVE_NET_SNMP */ } diff --git a/pdns/recursordist/rec-tcounters.hh b/pdns/recursordist/rec-tcounters.hh index 23e17efe56..bc74b05d57 100644 --- a/pdns/recursordist/rec-tcounters.hh +++ b/pdns/recursordist/rec-tcounters.hh @@ -96,6 +96,8 @@ enum class Counter : uint8_t maintenanceCalls, nodCount, udrCount, + maxChainLength, + maxChainWeight, numberOfCounters }; diff --git a/pdns/recursordist/rec_channel_rec.cc b/pdns/recursordist/rec_channel_rec.cc index 18bd991a0d..d86c097640 100644 --- a/pdns/recursordist/rec_channel_rec.cc +++ b/pdns/recursordist/rec_channel_rec.cc @@ -1566,6 +1566,9 @@ static void registerAllStats1() addGetStat("nod-events", [] { return g_Counters.sum(rec::Counter::nodCount); }); addGetStat("udr-events", [] { return g_Counters.sum(rec::Counter::udrCount); }); + addGetStat("max-chain-length", [] { return g_Counters.max(rec::Counter::maxChainLength); }); + addGetStat("max-chain-weight", [] { return g_Counters.max(rec::Counter::maxChainWeight); }); + /* make sure that the ECS stats are properly initialized */ SyncRes::clearECSStats(); for (size_t idx = 0; idx < SyncRes::s_ecsResponsesBySubnetSize4.size(); idx++) { diff --git a/pdns/recursordist/settings/table.py b/pdns/recursordist/settings/table.py index de4d398e9c..0525094dfe 100644 --- a/pdns/recursordist/settings/table.py +++ b/pdns/recursordist/settings/table.py @@ -1451,6 +1451,21 @@ and also smaller than `max-mthreads`. ''', 'versionadded': '4.3.0' }, + { + 'name': 'max_chain_length', + 'section': 'recursor', + 'type': LType.Uint64, + 'default': '0', + 'help': 'maximum number of queries that can be chained to an outgoing request, 0 is no limit', + 'doc': ''' +The maximum number of queries that can be attached to an outgoing request chain. Attaching requests to a chain +saves on outgoing queries, but the processing of a chain when the reply to the outgoing query comes in +might result in a large outgoing traffic spikes. Reducing the maximum chain length mitigate this. +If this value is zero, no maximum is enforced, though the maximum number of mthreads (:ref:`setting-max-mthreads`) +also limits the chain length. +''', + 'versionadded': ['4.8.7', '4.9.4', '5.0.3'] + }, { 'name' : 'max_include_depth', 'section' : 'recursor', diff --git a/pdns/recursordist/syncres.hh b/pdns/recursordist/syncres.hh index 471a2d0ca1..50626a37a1 100644 --- a/pdns/recursordist/syncres.hh +++ b/pdns/recursordist/syncres.hh @@ -762,7 +762,7 @@ struct PacketID PacketBuffer outMSG; // the outgoing message that needs to be sent using chain_t = set; - mutable chain_t chain; + mutable chain_t authReqChain; shared_ptr tcphandler{nullptr}; string::size_type inPos{0}; // how far are we along in the inMSG size_t inWanted{0}; // if this is set, we'll read until inWanted bytes are read diff --git a/pdns/recursordist/ws-recursor.cc b/pdns/recursordist/ws-recursor.cc index af2cbfee3e..1d5744959e 100644 --- a/pdns/recursordist/ws-recursor.cc +++ b/pdns/recursordist/ws-recursor.cc @@ -1246,6 +1246,14 @@ const std::map MetricDefinitionStorage::d_metrics {"udr-events", MetricDefinition(PrometheusMetricType::counter, "Count of UDR events")}, + + {"max-chain-length", + MetricDefinition(PrometheusMetricType::counter, + "Maximum chain length")}, + + {"max-chain-weight", + MetricDefinition(PrometheusMetricType::counter, + "Maximum chain weight")}, }; constexpr bool CHECK_PROMETHEUS_METRICS = false; diff --git a/regression-tests.recursor-dnssec/test_SNMP.py b/regression-tests.recursor-dnssec/test_SNMP.py index 4ad78640cb..baa087548d 100644 --- a/regression-tests.recursor-dnssec/test_SNMP.py +++ b/regression-tests.recursor-dnssec/test_SNMP.py @@ -21,7 +21,7 @@ class TestSNMP(RecursorTest): """ def _checkStatsValues(self, results): - count = 148 + count = 150 for i in list(range(1, count)): oid = self._snmpOID + '.1.' + str(i) + '.0' self.assertTrue(oid in results)