]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
dnsdist: Make the max size of entries in the packet cache configurable
authorRemi Gacogne <remi.gacogne@powerdns.com>
Fri, 24 Nov 2023 14:48:24 +0000 (15:48 +0100)
committerRemi Gacogne <remi.gacogne@powerdns.com>
Mon, 4 Dec 2023 12:50:55 +0000 (13:50 +0100)
It used to be set to 4096 bytes, which is also a hard limit for UDP
responses anyway, because of the internal buffer size, but the limit
can now be raised for responses received over TCP (including DoT and
DoH).

pdns/dnsdist-cache.cc
pdns/dnsdist-cache.hh
pdns/dnsdist-tcp.cc
pdns/dnsdist.cc
pdns/dnsdist.hh
pdns/dnsdistdist/dnsdist-lua-bindings-packetcache.cc
pdns/dnsdistdist/dnsdist-tcp-downstream.hh
pdns/dnsdistdist/dnsdist-tcp-upstream.hh
pdns/dnsdistdist/docs/reference/config.rst
pdns/test-dnsdistpacketcache_cc.cc
regression-tests.dnsdist/test_Caching.py

index 67de6226a0432d7362b5470249ead2e0a507f9c6..c3b0e75ef68dfb75bdda970de0c4a892232cc807 100644 (file)
@@ -119,9 +119,10 @@ void DNSDistPacketCache::insertLocked(CacheShard& shard, std::unordered_map<uint
 
 void DNSDistPacketCache::insert(uint32_t key, const boost::optional<Netmask>& subnet, uint16_t queryFlags, bool dnssecOK, const DNSName& qname, uint16_t qtype, uint16_t qclass, const PacketBuffer& response, bool receivedOverUDP, uint8_t rcode, boost::optional<uint32_t> tempFailureTTL)
 {
-  if (response.size() < sizeof(dnsheader)) {
+  if (response.size() < sizeof(dnsheader) || response.size() > getMaximumEntrySize()) {
     return;
   }
+
   if (qtype == QType::AXFR || qtype == QType::IXFR) {
     return;
   }
@@ -620,3 +621,8 @@ std::set<ComboAddress> DNSDistPacketCache::getRecordsForDomain(const DNSName& do
 
   return addresses;
 }
+
+void DNSDistPacketCache::setMaximumEntrySize(size_t maxSize)
+{
+  d_maximumEntrySize = maxSize;
+}
index 5b2ba2c78d4f88230f5091744cc005fba3c36509..95667bd4cd56c766c503e8543d8cc816feb996eb 100644 (file)
@@ -75,12 +75,14 @@ public:
     d_keepStaleData = keep;
   }
 
-
   void setECSParsingEnabled(bool enabled)
   {
     d_parseECS = enabled;
   }
 
+  void setMaximumEntrySize(size_t maxSize);
+  size_t getMaximumEntrySize() const { return d_maximumEntrySize; }
+
   uint32_t getKey(const DNSName::string_t& qname, size_t qnameWireLength, const PacketBuffer& packet, bool receivedOverUDP);
 
   static uint32_t getMinTTL(const char* packet, uint16_t length, bool* seenNoDataSOA);
@@ -139,15 +141,16 @@ private:
   pdns::stat_t d_ttlTooShorts{0};
   pdns::stat_t d_cleanupCount{0};
 
-  size_t d_maxEntries;
-  uint32_t d_shardCount;
-  uint32_t d_maxTTL;
-  uint32_t d_tempFailureTTL;
-  uint32_t d_maxNegativeTTL;
-  uint32_t d_minTTL;
-  uint32_t d_staleTTL;
-  bool d_dontAge;
-  bool d_deferrableInsertLock;
+  const size_t d_maxEntries;
+  size_t d_maximumEntrySize{4096};
+  const uint32_t d_shardCount;
+  const uint32_t d_maxTTL;
+  const uint32_t d_tempFailureTTL;
+  const uint32_t d_maxNegativeTTL;
+  const uint32_t d_minTTL;
+  const uint32_t d_staleTTL;
+  const bool d_dontAge;
+  const bool d_deferrableInsertLock;
   bool d_parseECS;
   bool d_keepStaleData{false};
 };
index 9ef56c62c1aa0730f57e265b85b48b5f0ae04e95..53cf1639761555dd546e8d43d90a8ebb90c69821 100644 (file)
@@ -1015,9 +1015,7 @@ void IncomingTCPConnectionState::handleIO()
             return;
           }
 
-          /* allocate a bit more memory to be able to spoof the content, get an answer from the cache
-             or to add ECS without allocating a new buffer */
-          d_buffer.resize(std::max(d_querySize + static_cast<size_t>(512), s_maxPacketCacheEntrySize));
+          d_buffer.resize(d_querySize);
           d_currentPos = 0;
         }
         else {
index b5f8cc0f3fa8377cc7aafd298971681959e0332d..dd4c96d33b0b48f3a486c0dae207393a655007f6 100644 (file)
@@ -153,7 +153,9 @@ uint32_t g_socketUDPRecvBuffer{0};
 
 std::set<std::string> g_capabilitiesToRetain;
 
-static size_t const s_initialUDPPacketBufferSize = s_maxPacketCacheEntrySize + DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE;
+// we are not willing to receive a bigger UDP response than that, no matter what
+static constexpr size_t s_maxUDPResponsePacketSize{4096U};
+static size_t const s_initialUDPPacketBufferSize = s_maxUDPResponsePacketSize + DNSCRYPT_MAX_RESPONSE_PADDING_AND_MAC_SIZE;
 static_assert(s_initialUDPPacketBufferSize <= UINT16_MAX, "Packet size should fit in a uint16_t");
 
 static ssize_t sendfromto(int sock, const void* data, size_t len, int flags, const ComboAddress& from, const ComboAddress& to)
@@ -550,7 +552,7 @@ bool processResponseAfterRules(PacketBuffer& response, const std::vector<DNSDist
     return false;
   }
 
-  if (dr.ids.packetCache && !dr.ids.selfGenerated && !dr.ids.skipCache && response.size() <= s_maxPacketCacheEntrySize) {
+  if (dr.ids.packetCache && !dr.ids.selfGenerated && !dr.ids.skipCache && (!dr.ids.forwardedOverUDP || response.size() <= s_maxUDPResponsePacketSize)) {
     if (!dr.ids.useZeroScope) {
       /* if the query was not suitable for zero-scope, for
          example because it had an existing ECS entry so the hash is
index 76c0ef8a160eb8b4decdc5851431291f78cc1e8f..da05c894a2ec3c2346f610e4e928cdba57862bd2 100644 (file)
@@ -1134,7 +1134,6 @@ extern bool g_addEDNSToSelfGeneratedResponses;
 
 extern std::set<std::string> g_capabilitiesToRetain;
 static const uint16_t s_udpIncomingBufferSize{1500}; // don't accept UDP queries larger than this value
-static const size_t s_maxPacketCacheEntrySize{4096}; // don't cache responses larger than this value
 
 enum class ProcessQueryResult : uint8_t { Drop, SendAnswer, PassToBackend, Asynchronous };
 ProcessQueryResult processQuery(DNSQuestion& dq, LocalHolders& holders, std::shared_ptr<DownstreamState>& selectedBackend);
index fd62eb5318ad784aefdd89a29b96c1f1fba83e2b..f71bf37516de58b4bbd36e8be4a0e9a62ae3cb07 100644 (file)
@@ -41,6 +41,7 @@ void setupLuaBindingsPacketCache(LuaContext& luaCtx, bool client)
     size_t maxNegativeTTL = 3600;
     size_t staleTTL = 60;
     size_t numberOfShards = 20;
+    size_t maxEntrySize{0};
     bool dontAge = false;
     bool deferrableInsertLock = true;
     bool ecsParsing = false;
@@ -59,6 +60,7 @@ void setupLuaBindingsPacketCache(LuaContext& luaCtx, bool client)
     getOptionalValue<size_t>(vars, "staleTTL", staleTTL);
     getOptionalValue<size_t>(vars, "temporaryFailureTTL", tempFailTTL);
     getOptionalValue<bool>(vars, "cookieHashing", cookieHashing);
+    getOptionalValue<size_t>(vars, "maximumEntrySize", maxEntrySize);
 
     if (getOptionalValue<decltype(skipOptions)>(vars, "skipOptions", skipOptions) > 0) {
       for (const auto& option : skipOptions) {
@@ -87,6 +89,9 @@ void setupLuaBindingsPacketCache(LuaContext& luaCtx, bool client)
 
     res->setKeepStaleData(keepStaleData);
     res->setSkippedOptions(optionsToSkip);
+    if (maxEntrySize >= sizeof(dnsheader)) {
+      res->setMaximumEntrySize(maxEntrySize);
+    }
 
     return res;
   });
index 81c87570b50bcada3ce5842cfd82c405fa2f0330..a165dc18cb1282dcff0d5fc0c3e0e6d0c223040d 100644 (file)
@@ -226,7 +226,7 @@ protected:
 class TCPConnectionToBackend : public ConnectionToBackend
 {
 public:
-  TCPConnectionToBackend(const std::shared_ptr<DownstreamState>& ds, std::unique_ptr<FDMultiplexer>& mplexer, const struct timeval& now, std::string&& /* proxyProtocolPayload*, unused but there to match the HTTP2 connections, so we can use the same templated connections manager class */): ConnectionToBackend(ds, mplexer, now), d_responseBuffer(s_maxPacketCacheEntrySize)
+  TCPConnectionToBackend(const std::shared_ptr<DownstreamState>& ds, std::unique_ptr<FDMultiplexer>& mplexer, const struct timeval& now, std::string&& /* proxyProtocolPayload*, unused but there to match the HTTP2 connections, so we can use the same templated connections manager class */): ConnectionToBackend(ds, mplexer, now), d_responseBuffer(512)
   {
   }
 
index 4c8c47323880b5fcac279457ec9ab5ee6cae6bf1..2c081fd27e93abd908216f132a6eb5f98a96e1d9 100644 (file)
@@ -30,7 +30,7 @@ public:
   enum class QueryProcessingResult : uint8_t { Forwarded, TooSmall, InvalidHeaders, Dropped, SelfAnswered, NoBackend, Asynchronous };
   enum class ProxyProtocolResult : uint8_t { Reading, Done, Error };
 
-  IncomingTCPConnectionState(ConnectionInfo&& ci, TCPClientThreadData& threadData, const struct timeval& now): d_buffer(s_maxPacketCacheEntrySize), d_ci(std::move(ci)), d_handler(d_ci.fd, timeval{g_tcpRecvTimeout,0}, d_ci.cs->tlsFrontend ? d_ci.cs->tlsFrontend->getContext() : (d_ci.cs->dohFrontend ? d_ci.cs->dohFrontend->d_tlsContext.getContext() : nullptr), now.tv_sec), d_connectionStartTime(now), d_ioState(make_unique<IOStateHandler>(*threadData.mplexer, d_ci.fd)), d_threadData(threadData), d_creatorThreadID(std::this_thread::get_id())
+  IncomingTCPConnectionState(ConnectionInfo&& ci, TCPClientThreadData& threadData, const struct timeval& now): d_buffer(sizeof(uint16_t)), d_ci(std::move(ci)), d_handler(d_ci.fd, timeval{g_tcpRecvTimeout,0}, d_ci.cs->tlsFrontend ? d_ci.cs->tlsFrontend->getContext() : (d_ci.cs->dohFrontend ? d_ci.cs->dohFrontend->d_tlsContext.getContext() : nullptr), now.tv_sec), d_connectionStartTime(now), d_ioState(make_unique<IOStateHandler>(*threadData.mplexer, d_ci.fd)), d_threadData(threadData), d_creatorThreadID(std::this_thread::get_id())
   {
     d_origDest.reset();
     d_origDest.sin4.sin_family = d_ci.remote.sin4.sin_family;
index 44f69a08966320d5d173364f1a6e633dce72f9e3..2d4f95927094035415a096c9715fc1f48dd7abd0 100644 (file)
@@ -916,6 +916,9 @@ See :doc:`../guides/cache` for a how to.
   .. versionchanged:: 1.7.0
     ``skipOptions`` parameter added.
 
+  .. versionchanged:: 1.9.0
+    ``maximumEntrySize`` parameter added.
+
   Creates a new :class:`PacketCache` with the settings specified.
 
   :param int maxEntries: The maximum number of entries in this cache
@@ -934,6 +937,7 @@ See :doc:`../guides/cache` for a how to.
   * ``temporaryFailureTTL=60``: int - On a SERVFAIL or REFUSED from the backend, cache for this amount of seconds..
   * ``cookieHashing=false``: bool - If true, EDNS Cookie values will be hashed, resulting in separate entries for different cookies in the packet cache. This is required if the backend is sending answers with EDNS Cookies, otherwise a client might receive an answer with the wrong cookie.
   * ``skipOptions={}``: Extra list of EDNS option codes to skip when hashing the packet (if ``cookieHashing`` above is false, EDNS cookie option number will be added to this list internally).
+  * ``maximumEntrySize=4096``: int - The maximum size, in bytes, of a DNS packet that can be inserted into the packet cache. Default is 4096 bytes, which was the fixed size before 1.9.0, and is also a hard limit for UDP responses.
 
 .. class:: PacketCache
 
index 14fceb06fac592b689835ce76ea024912e6d048d..bee70fc5cac80f13b03ac3a739a84b32510dd49f 100644 (file)
@@ -512,6 +512,155 @@ BOOST_AUTO_TEST_CASE(test_PacketCacheTruncated) {
   }
 }
 
+BOOST_AUTO_TEST_CASE(test_PacketCacheMaximumSize) {
+  const size_t maxEntries = 150000;
+  DNSDistPacketCache packetCache(maxEntries, 86400, 1);
+  InternalQueryState ids;
+  ids.qtype = QType::A;
+  ids.qclass = QClass::IN;
+  ids.protocol = dnsdist::Protocol::DoUDP;
+
+  ComboAddress remote;
+  bool dnssecOK = false;
+  ids.qname = DNSName("maximum.size");
+
+  PacketBuffer query;
+  uint16_t queryID{0};
+  {
+    GenericDNSPacketWriter<PacketBuffer> pwQ(query, ids.qname, QType::AAAA, QClass::IN, 0);
+    pwQ.getHeader()->rd = 1;
+    queryID = pwQ.getHeader()->id;
+  }
+
+  PacketBuffer response;
+  {
+    GenericDNSPacketWriter<PacketBuffer> pwR(response, ids.qname, QType::AAAA, QClass::IN, 0);
+    pwR.getHeader()->rd = 1;
+    pwR.getHeader()->ra = 1;
+    pwR.getHeader()->qr = 1;
+    pwR.getHeader()->id = queryID;
+    pwR.startRecord(ids.qname, QType::AAAA, 7200, QClass::IN, DNSResourceRecord::ANSWER);
+    ComboAddress v6("2001:db8::1");
+    pwR.xfrCAWithoutPort(6, v6);
+    pwR.commit();
+  }
+
+  /* first, we set the maximum entry size to the response packet size */
+  packetCache.setMaximumEntrySize(response.size());
+
+  {
+    /* UDP */
+    uint32_t key = 0;
+    boost::optional<Netmask> subnet;
+    DNSQuestion dq(ids, query);
+    bool found = packetCache.get(dq, 0, &key, subnet, dnssecOK, receivedOverUDP);
+    BOOST_CHECK_EQUAL(found, false);
+    BOOST_CHECK(!subnet);
+
+    packetCache.insert(key, subnet, *(getFlagsFromDNSHeader(dq.getHeader().get())), dnssecOK, ids.qname, QType::A, QClass::IN, response, receivedOverUDP, RCode::NoError, boost::none);
+    found = packetCache.get(dq, queryID, &key, subnet, dnssecOK, receivedOverUDP, 0, true);
+    BOOST_CHECK_EQUAL(found, true);
+    BOOST_CHECK(!subnet);
+  }
+
+  {
+    /* same but over TCP */
+    uint32_t key = 0;
+    boost::optional<Netmask> subnet;
+    ids.protocol = dnsdist::Protocol::DoTCP;
+    DNSQuestion dq(ids, query);
+    bool found = packetCache.get(dq, 0, &key, subnet, dnssecOK, !receivedOverUDP);
+    BOOST_CHECK_EQUAL(found, false);
+    BOOST_CHECK(!subnet);
+
+    packetCache.insert(key, subnet, *(getFlagsFromDNSHeader(dq.getHeader().get())), dnssecOK, ids.qname, QType::A, QClass::IN, response, !receivedOverUDP, RCode::NoError, boost::none);
+    found = packetCache.get(dq, queryID, &key, subnet, dnssecOK, !receivedOverUDP, 0, true);
+    BOOST_CHECK_EQUAL(found, true);
+    BOOST_CHECK(!subnet);
+  }
+
+  /* then we set it slightly below response packet size */
+  packetCache.expunge(0);
+  packetCache.setMaximumEntrySize(response.size() - 1);
+  {
+    /* UDP */
+    uint32_t key = 0;
+    boost::optional<Netmask> subnet;
+    DNSQuestion dq(ids, query);
+    bool found = packetCache.get(dq, 0, &key, subnet, dnssecOK, receivedOverUDP);
+    BOOST_CHECK_EQUAL(found, false);
+    BOOST_CHECK(!subnet);
+
+    packetCache.insert(key, subnet, *(getFlagsFromDNSHeader(dq.getHeader().get())), dnssecOK, ids.qname, QType::A, QClass::IN, response, receivedOverUDP, RCode::NoError, boost::none);
+    found = packetCache.get(dq, queryID, &key, subnet, dnssecOK, receivedOverUDP, 0, true);
+    BOOST_CHECK_EQUAL(found, false);
+  }
+
+  {
+    /* same but over TCP */
+    uint32_t key = 0;
+    boost::optional<Netmask> subnet;
+    ids.protocol = dnsdist::Protocol::DoTCP;
+    DNSQuestion dq(ids, query);
+    bool found = packetCache.get(dq, 0, &key, subnet, dnssecOK, !receivedOverUDP);
+    BOOST_CHECK_EQUAL(found, false);
+    BOOST_CHECK(!subnet);
+
+    packetCache.insert(key, subnet, *(getFlagsFromDNSHeader(dq.getHeader().get())), dnssecOK, ids.qname, QType::A, QClass::IN, response, !receivedOverUDP, RCode::NoError, boost::none);
+    found = packetCache.get(dq, queryID, &key, subnet, dnssecOK, !receivedOverUDP, 0, true);
+    BOOST_CHECK_EQUAL(found, false);
+  }
+
+  /* now we generate a very big response packet, it should be cached over TCP and UDP (although in practice dnsdist will refuse to cache it for the UDP case)  */
+  packetCache.expunge(0);
+  response.clear();
+  {
+    GenericDNSPacketWriter<PacketBuffer> pwR(response, ids.qname, QType::AAAA, QClass::IN, 0);
+    pwR.getHeader()->rd = 1;
+    pwR.getHeader()->ra = 1;
+    pwR.getHeader()->qr = 1;
+    pwR.getHeader()->id = queryID;
+    for (size_t idx = 0; idx < 1000; idx++) {
+      pwR.startRecord(ids.qname, QType::AAAA, 7200, QClass::IN, DNSResourceRecord::ANSWER);
+      ComboAddress v6("2001:db8::1");
+      pwR.xfrCAWithoutPort(6, v6);
+    }
+    pwR.commit();
+  }
+
+  BOOST_REQUIRE_GT(response.size(), 4096U);
+  packetCache.setMaximumEntrySize(response.size());
+
+  {
+    /* UDP */
+    uint32_t key = 0;
+    boost::optional<Netmask> subnet;
+    DNSQuestion dq(ids, query);
+    bool found = packetCache.get(dq, 0, &key, subnet, dnssecOK, receivedOverUDP);
+    BOOST_CHECK_EQUAL(found, false);
+    BOOST_CHECK(!subnet);
+
+    packetCache.insert(key, subnet, *(getFlagsFromDNSHeader(dq.getHeader().get())), dnssecOK, ids.qname, QType::A, QClass::IN, response, receivedOverUDP, RCode::NoError, boost::none);
+    found = packetCache.get(dq, queryID, &key, subnet, dnssecOK, receivedOverUDP, 0, true);
+    BOOST_CHECK_EQUAL(found, true);
+  }
+
+  {
+    /* same but over TCP */
+    uint32_t key = 0;
+    boost::optional<Netmask> subnet;
+    ids.protocol = dnsdist::Protocol::DoTCP;
+    DNSQuestion dq(ids, query);
+    bool found = packetCache.get(dq, 0, &key, subnet, dnssecOK, !receivedOverUDP);
+    BOOST_CHECK_EQUAL(found, false);
+    BOOST_CHECK(!subnet);
+
+    packetCache.insert(key, subnet, *(getFlagsFromDNSHeader(dq.getHeader().get())), dnssecOK, ids.qname, QType::A, QClass::IN, response, !receivedOverUDP, RCode::NoError, boost::none);
+    found = packetCache.get(dq, queryID, &key, subnet, dnssecOK, !receivedOverUDP, 0, true);
+    BOOST_CHECK_EQUAL(found, true);
+  }
+}
+
 static DNSDistPacketCache g_PC(500000);
 
 static void threadMangler(unsigned int offset)
index d7539836e8d50029aac2e0a41fda2edb33929e7b..7af2be9c9c171fbec391afcaa994ea84d3f3c093 100644 (file)
@@ -2916,3 +2916,71 @@ class TestAPICache(DNSDistTest):
         receivedQuery.id = query.id
         self.assertEqual(query, receivedQuery)
         self.assertEqual(receivedResponse, response)
+
+class TestCachingOfVeryLargeAnswers(DNSDistTest):
+
+    _config_template = """
+    pc = newPacketCache(100, {maxTTL=86400, minTTL=1, maximumEntrySize=8192})
+    getPool(""):setCache(pc)
+    newServer{address="127.0.0.1:%d"}
+    """
+
+    def testVeryLargeAnswer(self):
+        """
+        Cache: Check that we can cache (and retrieve) VERY large answers
+
+        We should be able to get answers as large as 8192 bytes this time
+        """
+        numberOfQueries = 10
+        name = 'very-large-answer.cache.tests.powerdns.com.'
+        query = dns.message.make_query(name, 'TXT', 'IN')
+        response = dns.message.make_response(query)
+        # we prepare a large answer
+        #         for i in range(44):
+        #             if len(content) > 0:
+        #                 content = content + ', '
+        #             content = content + (str(i)*50)
+        #         # pad up to 4096
+        #         content = content + 'A'*42
+        content = ''
+        for i in range(31):
+            if len(content) > 0:
+                content = content + ' '
+            content = content + 'A' * 255
+        # pad up to 8192
+        content = content + ' ' + 'B' * 183
+
+        rrset = dns.rrset.from_text(name,
+                                    3600,
+                                    dns.rdataclass.IN,
+                                    dns.rdatatype.TXT,
+                                    content)
+        response.answer.append(rrset)
+        self.assertEqual(len(response.to_wire()), 8192)
+
+        # # first query to fill the cache, over TCP
+        (receivedQuery, receivedResponse) = self.sendTCPQuery(query, response)
+        self.assertTrue(receivedQuery)
+        self.assertTrue(receivedResponse)
+        receivedQuery.id = query.id
+        self.assertEqual(query, receivedQuery)
+        self.assertEqual(receivedResponse, response)
+
+        for _ in range(numberOfQueries):
+            (_, receivedResponse) = self.sendTCPQuery(query, response=None, useQueue=False)
+            self.assertEqual(receivedResponse, response)
+
+        total = 0
+        for key in self._responsesCounter:
+            total += self._responsesCounter[key]
+            TestCachingOfVeryLargeAnswers._responsesCounter[key] = 0
+
+        self.assertEqual(total, 1)
+
+        # UDP should not be cached, dnsdist has a hard limit to 4096 bytes for UDP
+        # actually we will never get an answer, because dnsdist will not be able to get it from the backend
+        (receivedQuery, receivedResponse) = self.sendUDPQuery(query, response)
+        self.assertTrue(receivedQuery)
+        self.assertFalse(receivedResponse)
+        receivedQuery.id = query.id
+        self.assertEqual(query, receivedQuery)