From: Remi Gacogne Date: Tue, 26 Aug 2025 12:00:26 +0000 (+0200) Subject: dnsdist: Speed up cache hits by skipping the LB policy when possible X-Git-Tag: rec-5.4.0-alpha1~191^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=059bd0e22be0680a87bdad09c687ba0fe65e2dbb;p=thirdparty%2Fpdns.git dnsdist: Speed up cache hits by skipping the LB policy when possible We use to execute the load-balancing policy to select a backend before doing the cache lookup, because in some corner cases the selected backend might have settings that impact our cache lookup. In practice most configurations have a consistent set of settings for all servers in a given pool, so it makes no sense to waste CPU cycles selecting a backend if we are going to get a hit from the cache. This PR adds a bit of code to check if a pool is in a consistent state, and if it is it delays the execution of the load-balancing policy to after the cache lookup, skipping it entirely for cache hits. Signed-off-by: Remi Gacogne --- diff --git a/pdns/dnsdistdist/dnsdist-backend.cc b/pdns/dnsdistdist/dnsdist-backend.cc index b55d2441a8..a04b36e3eb 100644 --- a/pdns/dnsdistdist/dnsdist-backend.cc +++ b/pdns/dnsdistdist/dnsdist-backend.cc @@ -1039,6 +1039,17 @@ size_t ServerPool::poolLoad() const return load; } +bool ServerPool::hasAtLeastOneServerAvailable() const +{ + // NOLINTNEXTLINE(readability-use-anyofallof): no it's not more readable + for (const auto& server : d_servers) { + if (std::get<1>(server)->isUp()) { + return true; + } + } + return false; +} + const ServerPolicy::NumberedServerVector& ServerPool::getServers() const { return d_servers; @@ -1058,22 +1069,15 @@ void ServerPool::addServer(std::shared_ptr& server) serv.first = idx++; } - if (d_servers.size() == 1) { - d_tcpOnly = server->isTCPOnly(); - } - else if (!server->isTCPOnly()) { - d_tcpOnly = false; - } + updateConsistency(); } void ServerPool::removeServer(shared_ptr& server) { size_t idx = 1; bool found = false; - bool tcpOnly = true; for (auto it = d_servers.begin(); it != d_servers.end();) { if (found) { - tcpOnly = tcpOnly && it->second->isTCPOnly(); /* we need to renumber the servers placed after the removed one, for Lua (custom policies) */ it->first = idx++; @@ -1083,12 +1087,61 @@ void ServerPool::removeServer(shared_ptr& server) it = d_servers.erase(it); found = true; } else { - tcpOnly = tcpOnly && it->second->isTCPOnly(); idx++; it++; } } + + if (found && !d_isConsistent) { + updateConsistency(); + } +} + +void ServerPool::updateConsistency() +{ + bool first{true}; + bool useECS{false}; + bool tcpOnly{false}; + bool zeroScope{false}; + + for (const auto& serverPair : d_servers) { + const auto& server = serverPair.second; + if (first) { + first = false; + useECS = server->d_config.useECS; + tcpOnly = server->isTCPOnly(); + zeroScope = !server->d_config.disableZeroScope; + } + else { + if (server->d_config.useECS != useECS || + server->isTCPOnly() != tcpOnly || + server->d_config.disableZeroScope == zeroScope) { + d_tcpOnly = false; + d_isConsistent = false; + return; + } + } + } + d_tcpOnly = tcpOnly; + /* at this point we know that all servers agree + on these settings, so let's just use the same + values for the pool itself */ + d_useECS = useECS; + d_zeroScope = zeroScope; + d_isConsistent = true; +} + +void ServerPool::setZeroScope(bool enabled) +{ + d_zeroScope = enabled; + updateConsistency(); +} + +void ServerPool::setECS(bool useECS) +{ + d_useECS = useECS; + updateConsistency(); } namespace dnsdist::backend diff --git a/pdns/dnsdistdist/dnsdist-lbpolicies.hh b/pdns/dnsdistdist/dnsdist-lbpolicies.hh index 5f3a50d9d8..61cc874000 100644 --- a/pdns/dnsdistdist/dnsdist-lbpolicies.hh +++ b/pdns/dnsdistdist/dnsdist-lbpolicies.hh @@ -84,19 +84,19 @@ public: return d_selected.has_value(); } - DownstreamState* operator->() const noexcept + DownstreamState* operator->() const { return (*d_backends)[*d_selected].second.get(); } - const std::shared_ptr& get() const noexcept + const std::shared_ptr& get() const { return (*d_backends)[*d_selected].second; } private: const NumberedServerVector* d_backends{nullptr}; - std::optional d_selected; + std::optional d_selected{std::nullopt}; }; SelectedBackend getSelectedBackend(const ServerPolicy::NumberedServerVector& servers, DNSQuestion& dnsQuestion) const; diff --git a/pdns/dnsdistdist/dnsdist-lua-bindings.cc b/pdns/dnsdistdist/dnsdist-lua-bindings.cc index ca14401186..48a82303f5 100644 --- a/pdns/dnsdistdist/dnsdist-lua-bindings.cc +++ b/pdns/dnsdistdist/dnsdist-lua-bindings.cc @@ -156,6 +156,31 @@ void setupLuaBindings(LuaContext& luaCtx, bool client, bool configCheck) }); } }); + luaCtx.registerFunction::*)() const>("getZeroScope", [](const std::shared_ptr& pool) { + bool zeroScope = false; + if (pool) { + dnsdist::configuration::updateRuntimeConfiguration([&pool, &zeroScope](dnsdist::configuration::RuntimeConfiguration& config) { + auto poolIt = config.d_pools.find(pool->poolName); + /* this might happen if the Server Pool has been removed in the meantime, let's gracefully ignore it */ + if (poolIt != config.d_pools.end()) { + zeroScope = poolIt->second.getZeroScope(); + } + }); + } + return zeroScope; + }); + luaCtx.registerFunction::*)(bool enabled)>("setZeroScope", [](std::shared_ptr& pool, bool enabled) { + if (pool) { + dnsdist::configuration::updateRuntimeConfiguration([&pool, enabled](dnsdist::configuration::RuntimeConfiguration& config) { + auto poolIt = config.d_pools.find(pool->poolName); + if (poolIt == config.d_pools.end()) { + /* this might happen if the Server Pool has been removed in the meantime, let's gracefully ignore it */ + return; + } + poolIt->second.setZeroScope(enabled); + }); + } + }); #ifndef DISABLE_DOWNSTREAM_BINDINGS /* DownstreamState */ diff --git a/pdns/dnsdistdist/dnsdist-server-pool.hh b/pdns/dnsdistdist/dnsdist-server-pool.hh index 84e48f171d..2336e8e9bc 100644 --- a/pdns/dnsdistdist/dnsdist-server-pool.hh +++ b/pdns/dnsdistdist/dnsdist-server-pool.hh @@ -38,13 +38,23 @@ struct ServerPool return d_useECS; } - void setECS(bool useECS) + void setECS(bool useECS); + + bool getZeroScope() const + { + return d_zeroScope; + } + + void setZeroScope(bool enabled); + + bool isConsistent() const { - d_useECS = useECS; + return d_isConsistent; } size_t poolLoad() const; size_t countServers(bool upOnly) const; + bool hasAtLeastOneServerAvailable() const; const ServerPolicy::NumberedServerVector& getServers() const; void addServer(std::shared_ptr& server); void removeServer(std::shared_ptr& server); @@ -57,7 +67,11 @@ struct ServerPool std::shared_ptr policy{nullptr}; private: + void updateConsistency(); + ServerPolicy::NumberedServerVector d_servers; bool d_useECS{false}; + bool d_zeroScope{true}; bool d_tcpOnly{false}; + bool d_isConsistent{true}; }; diff --git a/pdns/dnsdistdist/dnsdist-settings-definitions.yml b/pdns/dnsdistdist/dnsdist-settings-definitions.yml index 785c6116ad..606d61bd6e 100644 --- a/pdns/dnsdistdist/dnsdist-settings-definitions.yml +++ b/pdns/dnsdistdist/dnsdist-settings-definitions.yml @@ -2040,6 +2040,14 @@ pool: type: "String" default: "" description: "The name of the load-balancing policy associated to this pool. If left empty, the global policy will be used" + - name: "use_ecs" + type: "bool" + default: "false" + description: "Whether to add EDNS Client Subnet information to the query before looking up into the cache, when all servers from this pool are down. If at least one server is up, the preference of the selected server is used, this parameter is only useful if all the backends in this pool are down and have EDNS Client Subnet enabled, since the queries in the cache will have been inserted with ECS information" + - name: "disable_zero_scope" + type: "bool" + default: "false" + description: "Whether to disable the EDNS Client Subnet :doc:`../advanced/zero-scope` feature, which does a cache lookup for an answer valid for all subnets (ECS scope of 0) before adding ECS information to the query and doing the regular lookup, when all servers from this pool are down. If at least one server is up, the preference of the selected server is used, this parameter is only useful if all the backends in this pool are down, have EDNS Client Subnet enabled and zero scope disabled" custom_load_balancing_policy: description: "Settings for a custom load-balancing policy" diff --git a/pdns/dnsdistdist/dnsdist.cc b/pdns/dnsdistdist/dnsdist.cc index a1e58df1c7..47c5c42ff5 100644 --- a/pdns/dnsdistdist/dnsdist.cc +++ b/pdns/dnsdistdist/dnsdist.cc @@ -1439,8 +1439,14 @@ ProcessQueryResult processQueryAfterRules(DNSQuestion& dnsQuestion, std::shared_ if (dnsQuestion.getHeader()->qr) { // something turned it into a response return handleQueryTurnedIntoSelfAnsweredResponse(dnsQuestion); } + bool backendLookupDone = false; const auto& serverPool = getPool(dnsQuestion.ids.poolName); - auto selectedBackend = selectBackendForOutgoingQuery(dnsQuestion, serverPool); + ServerPolicy::SelectedBackend selectedBackend(serverPool.getServers()); + if (!serverPool.packetCache || !serverPool.isConsistent()) { + selectedBackend = selectBackendForOutgoingQuery(dnsQuestion, serverPool); + backendLookupDone = true; + } + bool willBeForwardedOverUDP = !dnsQuestion.overTCP() || dnsQuestion.ids.protocol == dnsdist::Protocol::DoH; if (selectedBackend && selectedBackend->isTCPOnly()) { willBeForwardedOverUDP = false; @@ -1449,17 +1455,22 @@ ProcessQueryResult processQueryAfterRules(DNSQuestion& dnsQuestion, std::shared_ willBeForwardedOverUDP = !serverPool.isTCPOnly(); } - uint32_t allowExpired = selectedBackend ? 0 : dnsdist::configuration::getCurrentRuntimeConfiguration().d_staleCacheEntriesTTL; + uint32_t allowExpired = 0; + if (!selectedBackend && dnsdist::configuration::getCurrentRuntimeConfiguration().d_staleCacheEntriesTTL > 0 && (backendLookupDone || !serverPool.hasAtLeastOneServerAvailable())) { + allowExpired = dnsdist::configuration::getCurrentRuntimeConfiguration().d_staleCacheEntriesTTL; + } if (serverPool.packetCache && !dnsQuestion.ids.skipCache && !dnsQuestion.ids.dnssecOK) { dnsQuestion.ids.dnssecOK = (dnsdist::getEDNSZ(dnsQuestion) & EDNS_HEADER_FLAG_DO) != 0; } - if (dnsQuestion.useECS && ((selectedBackend && selectedBackend->d_config.useECS) || (!selectedBackend && serverPool.getECS()))) { + const bool useECS = dnsQuestion.useECS && ((selectedBackend && selectedBackend->d_config.useECS) || (!selectedBackend && serverPool.getECS())); + if (useECS) { + const bool useZeroScope = (selectedBackend && !selectedBackend->d_config.disableZeroScope) || (!selectedBackend && serverPool.getZeroScope()); // we special case our cache in case a downstream explicitly gave us a universally valid response with a 0 scope // we need ECS parsing (parseECS) to be true so we can be sure that the initial incoming query did not have an existing // ECS option, which would make it unsuitable for the zero-scope feature. - if (serverPool.packetCache && !dnsQuestion.ids.skipCache && (!selectedBackend || !selectedBackend->d_config.disableZeroScope) && serverPool.packetCache->isECSParsingEnabled()) { + if (serverPool.packetCache && !dnsQuestion.ids.skipCache && useZeroScope && serverPool.packetCache->isECSParsingEnabled()) { if (serverPool.packetCache->get(dnsQuestion, dnsQuestion.getHeader()->id, &dnsQuestion.ids.cacheKeyNoECS, dnsQuestion.ids.subnet, *dnsQuestion.ids.dnssecOK, willBeForwardedOverUDP, allowExpired, false, true, false)) { vinfolog("Packet cache hit for query for %s|%s from %s (%s, %d bytes)", dnsQuestion.ids.qname.toLogString(), QType(dnsQuestion.ids.qtype).toString(), dnsQuestion.ids.origRemote.toStringWithPort(), dnsQuestion.ids.protocol.toString(), dnsQuestion.getData().size()); @@ -1542,12 +1553,17 @@ ProcessQueryResult processQueryAfterRules(DNSQuestion& dnsQuestion, std::shared_ const auto& newServerPool = getPool(dnsQuestion.ids.poolName); dnsQuestion.ids.packetCache = newServerPool.packetCache; selectedBackend = selectBackendForOutgoingQuery(dnsQuestion, newServerPool); + backendLookupDone = true; } else { dnsQuestion.ids.packetCache = serverPool.packetCache; } } + if (!backendLookupDone) { + selectedBackend = selectBackendForOutgoingQuery(dnsQuestion, serverPool); + } + if (!selectedBackend) { auto servFailOnNoPolicy = dnsdist::configuration::getCurrentRuntimeConfiguration().d_servFailOnNoPolicy; ++dnsdist::metrics::g_stats.noPolicy; diff --git a/pdns/dnsdistdist/docs/reference/config.rst b/pdns/dnsdistdist/docs/reference/config.rst index fa8c8d1488..9a7c042986 100644 --- a/pdns/dnsdistdist/docs/reference/config.rst +++ b/pdns/dnsdistdist/docs/reference/config.rst @@ -987,6 +987,13 @@ Servers that are not assigned to a specific pool get assigned to the default poo Returns the :class:`PacketCache` for this pool or nil. + .. method:: ServerPool:getDisableZeroScope() + + .. versionadded:: 2.0.1 + + Whether dnsdist will disable the EDNS Client Subnet :doc:`../advanced/zero-scope` feature when looking up into the cache, + when all servers from this pool are down. + .. method:: ServerPool:getECS() Whether dnsdist will add EDNS Client Subnet information to the query before looking up into the cache, @@ -998,9 +1005,12 @@ Servers that are not assigned to a specific pool get assigned to the default poo :param PacketCache cache: The new cache to add to the pool - .. method:: ServerPool:unsetCache() + .. method:: ServerPool:setDisableZeroScope(disable) - Removes the cache from this pool. + .. versionadded:: 2.0.1 + + Set to true if dnsdist should disable the EDNS Client Subnet :doc:`../advanced/zero-scope` feature when looking up into the cache, + when all servers from this pool are down. .. method:: ServerPool:setECS() @@ -1010,6 +1020,10 @@ Servers that are not assigned to a specific pool get assigned to the default poo and have EDNS Client Subnet enabled, since the queries in the cache will have been inserted with ECS information. Default is false. + .. method:: ServerPool:unsetCache() + + Removes the cache from this pool. + PacketCache ~~~~~~~~~~~ diff --git a/regression-tests.dnsdist/test_Caching.py b/regression-tests.dnsdist/test_Caching.py index 8483944232..3a06634df4 100644 --- a/regression-tests.dnsdist/test_Caching.py +++ b/regression-tests.dnsdist/test_Caching.py @@ -2267,13 +2267,17 @@ class TestCachingECSWithoutPoolECS(DNSDistTest): _consoleKey = DNSDistTest.generateConsoleKey() _consoleKeyB64 = base64.b64encode(_consoleKey).decode('ascii') - _config_params = ['_consoleKeyB64', '_consolePort', '_testServerPort'] + _config_params = ['_consoleKeyB64', '_consolePort', '_testServerPort', '_testServerPort'] _config_template = """ pc = newPacketCache(100, {maxTTL=86400, minTTL=1}) getPool(""):setCache(pc) setKey("%s") controlSocket("127.0.0.1:%d") newServer{address="127.0.0.1:%d", useClientSubnet=true} + -- add a second server without ECS, which will never be used + -- but makes the pool inconsistent + newServer{address="127.0.0.1:%d", useClientSubnet=false}:setDown() + getPool(""):setECS(false) """ def testCached(self): @@ -2325,10 +2329,10 @@ class TestCachingECSWithPoolECS(DNSDistTest): _config_template = """ pc = newPacketCache(100, {maxTTL=86400, minTTL=1}) getPool(""):setCache(pc) - getPool(""):setECS(true) setKey("%s") controlSocket("127.0.0.1:%d") newServer{address="127.0.0.1:%d", useClientSubnet=true} + getPool(""):setECS(true) """ def testCached(self):