From: Otto Moerbeek <otto.moerbeek@open-xchange.com>
Date: Mon, 28 Oct 2024 09:52:56 +0000 (+0100)
Subject: Allow zero for no limit in getRecordCacheRecords()
X-Git-Tag: rec-5.2.0-alpha1~12^2~2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c816aab673de0efce23d7fc5a34ab3cdd22c3c1e;p=thirdparty%2Fpdns.git

Allow zero for no limit in getRecordCacheRecords()

Also better distinction between the term record and record set in code and docs
---

diff --git a/pdns/recursordist/docs/lua-scripting/functions.rst b/pdns/recursordist/docs/lua-scripting/functions.rst
index af1ad09e44..a70c503c35 100644
--- a/pdns/recursordist/docs/lua-scripting/functions.rst
+++ b/pdns/recursordist/docs/lua-scripting/functions.rst
@@ -42,7 +42,7 @@ These are some functions that don't really have a place in one of the other cate
    :param str dump: The data in the proprietary format produced by :func:`getRecordCacheRecords`).
    :returns: The number of records inserted into the record cache.
 
-   Some records might be skipped, for example when they are already present in the record cache or contain specific information not supported yet by this function.
+   Some record sets might be skipped, for example when they are already present in the record cache or contain specific information not supported yet by this function.
    If the :program:`Recursor` determines the version of the data is not compatible, it will skip loading and log an error.
 
 .. function:: getRecordCacheRecords(perShard, maxSize) -> str, int
@@ -51,13 +51,16 @@ These are some functions that don't really have a place in one of the other cate
 
    Get a record cache dump in proprietary format.
 
-   :param int perShard: The maximum number of records to retrieve per shard.
-   :param int maxSize: The maximum size of the dump.
+   :param int perShard: The maximum number of record sets to retrieve per shard. Zero is unlimited.
+   :param int maxSize: The maximum size of the dump. Zero is unlimited.
 
-   :return: A string representing the records and an integer specifying how many records were retrieved
+   :return: A string representing the record sets and an integer specifying how many record sets were retrieved
 
-   This function will scan the most recently used records of each shard, picking at most ``perShard`` records per shard and adding them to the result.
-   If adding a record's data to the result would make the result size exceed ``maxSize``, the remainder of the current shard and further remaining shards are skipped.
+   This function will scan the most recently used record sets of each shard, picking at most ``perShard`` record sets per shard and adding them to the result.
+   If adding a record set's data to the result would make the result size exceed ``maxSize``, the remainder of the current shard and further remaining shards are skipped.
    The format of the string produced is proprietary.
    The string contains meta information, so the :program:`Recursor` calling :func:`putIntoRecordCache` can check if the data format is compatible.
 
+   Note that setting both limits to zero can produce very large strings. It is wise to set at least one of the limits.
+   Additionally, setting ``maxSize`` to zero can lead to less efficient memory management while producing the dump.
+
diff --git a/pdns/recursordist/lua-recursor4.cc b/pdns/recursordist/lua-recursor4.cc
index 2b4b5887cd..3ad86491ef 100644
--- a/pdns/recursordist/lua-recursor4.cc
+++ b/pdns/recursordist/lua-recursor4.cc
@@ -496,12 +496,12 @@ void RecursorLua4::postPrepareContext() // NOLINT(readability-function-cognitive
 
   d_lw->writeFunction("getRecordCacheRecords", [](size_t perShard, size_t maxSize) {
     std::string ret;
-    auto number = g_recCache->getRecords(perShard, maxSize, ret);
+    auto number = g_recCache->getRecordSets(perShard, maxSize, ret);
     return std::tuple<std::string, size_t>{ret, number};
   });
 
   d_lw->writeFunction("putIntoRecordCache", [](const string& data) {
-    return g_recCache->putRecords(data);
+    return g_recCache->putRecordSets(data);
   });
 
   d_lw->writeFunction("spawnThread", [](const string& scriptName) {
diff --git a/pdns/recursordist/recursor_cache.cc b/pdns/recursordist/recursor_cache.cc
index 9bfdb010fd..72aa20c7d8 100644
--- a/pdns/recursordist/recursor_cache.cc
+++ b/pdns/recursordist/recursor_cache.cc
@@ -1011,7 +1011,7 @@ static void decodeNetmask(protozero::pbf_message<T>& message, Netmask& subnet)
 }
 
 template <typename T, typename U>
-void MemRecursorCache::getRecord(T& message, U recordSet)
+void MemRecursorCache::getRecordSet(T& message, U recordSet)
 {
   // Two fields below must come before the other fields
   message.add_bytes(PBCacheEntry::required_bytes_name, recordSet->d_qname.toString());
@@ -1047,11 +1047,21 @@ void MemRecursorCache::getRecord(T& message, U recordSet)
   message.add_bool(PBCacheEntry::required_bool_tooBig, recordSet->d_tooBig);
 }
 
-size_t MemRecursorCache::getRecords(size_t perShard, size_t maxSize, std::string& ret)
+size_t MemRecursorCache::getRecordSets(size_t perShard, size_t maxSize, std::string& ret)
 {
   auto log = g_slog->withName("recordcache")->withValues("perShard", Logging::Loggable(perShard), "maxSize", Logging::Loggable(maxSize));
   log->info(Logr::Info, "Producing cache dump");
 
+  // A size estmate is hard: size() returns the number of record *sets*. Each record set can have
+  // multiple records, plus other associated records like signatures. 150 seems to works ok.
+  size_t estimate = maxSize == 0 ? size() * 150 : maxSize + 4096; // We may overshoot (will be rolled back)
+
+  if (perShard == 0) {
+    perShard = std::numeric_limits<size_t>::max();
+  }
+  if (maxSize == 0) {
+    maxSize = std::numeric_limits<size_t>::max();
+  }
   protozero::pbf_builder<PBCacheDump> full(ret);
   full.add_string(PBCacheDump::required_string_version, getPDNSVersion());
   full.add_string(PBCacheDump::required_string_identity, SyncRes::s_serverID);
@@ -1060,7 +1070,7 @@ size_t MemRecursorCache::getRecords(size_t perShard, size_t maxSize, std::string
   full.add_string(PBCacheDump::required_string_type, "PBCacheDump");
 
   size_t count = 0;
-  ret.reserve(maxSize + 4096); // We may overshoot (will be rolled back)
+  ret.reserve(estimate);
 
   for (auto& shard : d_maps) {
     auto lockedShard = shard.lock();
@@ -1068,7 +1078,7 @@ size_t MemRecursorCache::getRecords(size_t perShard, size_t maxSize, std::string
     size_t thisShardCount = 0;
     for (auto recordSet = sidx.rbegin(); recordSet != sidx.rend(); ++recordSet) {
       protozero::pbf_builder<PBCacheEntry> message(full, PBCacheDump::repeated_message_cacheEntry);
-      getRecord(message, recordSet);
+      getRecordSet(message, recordSet);
       if (ret.size() > maxSize) {
         message.rollback();
         log->info(Logr::Info, "Produced cache dump (max size reached)", "size", Logging::Loggable(ret.size()), "count", Logging::Loggable(count));
@@ -1122,7 +1132,7 @@ static void putAuthRecord(protozero::pbf_message<PBCacheEntry>& message, const D
 }
 
 template <typename T>
-bool MemRecursorCache::putRecord(T& message)
+bool MemRecursorCache::putRecordSet(T& message)
 {
   CacheEntry cacheEntry{{g_rootdnsname, QType::A, boost::none, Netmask()}, false};
   while (message.next()) {
@@ -1188,7 +1198,7 @@ bool MemRecursorCache::putRecord(T& message)
   return replace(std::move(cacheEntry));
 }
 
-size_t MemRecursorCache::putRecords(const std::string& pbuf)
+size_t MemRecursorCache::putRecordSets(const std::string& pbuf)
 {
   auto log = g_slog->withName("recordcache")->withValues("size", Logging::Loggable(pbuf.size()));
   log->info(Logr::Debug, "Processing cache dump");
@@ -1231,7 +1241,7 @@ size_t MemRecursorCache::putRecords(const std::string& pbuf)
       }
       case PBCacheDump::repeated_message_cacheEntry: {
         protozero::pbf_message<PBCacheEntry> message = full.get_message();
-        if (putRecord(message)) {
+        if (putRecordSet(message)) {
           ++inserted;
         }
         ++count;
diff --git a/pdns/recursordist/recursor_cache.hh b/pdns/recursordist/recursor_cache.hh
index 596eeb85ad..5941315827 100644
--- a/pdns/recursordist/recursor_cache.hh
+++ b/pdns/recursordist/recursor_cache.hh
@@ -62,8 +62,8 @@ public:
   [[nodiscard]] pair<uint64_t, uint64_t> stats();
   [[nodiscard]] size_t ecsIndexSize();
 
-  size_t getRecords(size_t perShard, size_t maxSize, std::string& ret);
-  size_t putRecords(const std::string& pbuf);
+  size_t getRecordSets(size_t perShard, size_t maxSize, std::string& ret);
+  size_t putRecordSets(const std::string& pbuf);
 
   using OptTag = boost::optional<std::string>;
 
@@ -154,9 +154,9 @@ private:
   bool replace(CacheEntry&& entry);
   // Using templates to avoid exposing protozero types in this header file
   template <typename T>
-  bool putRecord(T&);
+  bool putRecordSet(T&);
   template <typename T, typename U>
-  void getRecord(T&, U);
+  void getRecordSet(T&, U);
 
   /* The ECS Index (d_ecsIndex) keeps track of whether there is any ECS-specific
      entry for a given (qname,qtype) entry in the cache (d_map), and if so
diff --git a/pdns/recursordist/test-recursorcache_cc.cc b/pdns/recursordist/test-recursorcache_cc.cc
index 8cdb545f6c..2404060622 100644
--- a/pdns/recursordist/test-recursorcache_cc.cc
+++ b/pdns/recursordist/test-recursorcache_cc.cc
@@ -1368,10 +1368,10 @@ BOOST_AUTO_TEST_CASE(test_RecursorCacheDumpAndRestore)
     checker();
 
     std::string dump;
-    MRC.getRecords(std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::max(), dump);
+    MRC.getRecordSets(0, 0, dump);
     MRC.doWipeCache(DNSName("."), true);
     BOOST_CHECK_EQUAL(MRC.size(), 0U);
-    size_t inserted = MRC.putRecords(dump);
+    size_t inserted = MRC.putRecordSets(dump);
     BOOST_CHECK_EQUAL(inserted, 100U);
     BOOST_CHECK_EQUAL(MRC.size(), 100U);