From: Otto Moerbeek <otto.moerbeek@open-xchange.com>
Date: Tue, 3 Jan 2023 10:36:54 +0000 (+0100)
Subject: Take shard size and number of remaining shards into account when cleaning, so that... 
X-Git-Tag: dnsdist-1.8.0-rc1~137^2~1
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5dfa679e323b3bd0e40a60fb4d4e8e6916d8e619;p=thirdparty%2Fpdns.git

Take shard size and number of remaining shards into account when cleaning, so that big shards get pruned more and if we are behind due to rounding etc we get more eager.
---

diff --git a/pdns/cachecleaner.hh b/pdns/cachecleaner.hh
index f489a9d88f..dc6e0f0f7e 100644
--- a/pdns/cachecleaner.hh
+++ b/pdns/cachecleaner.hh
@@ -131,34 +131,39 @@ uint64_t pruneLockedCollectionsVector(std::vector<T>& maps)
 template <typename S, typename C, typename T>
 uint64_t pruneMutexCollectionsVector(C& container, std::vector<T>& maps, uint64_t maxCached, uint64_t cacheSize)
 {
-  time_t now = time(nullptr);
+  const time_t now = time(nullptr);
   uint64_t totErased = 0;
   uint64_t toTrim = 0;
   uint64_t lookAt = 0;
 
   // two modes - if toTrim is 0, just look through 10%  of the cache and nuke everything that is expired
-  // otherwise, scan first 5*toTrim records, and stop once we've nuked enough
+  // otherwise, scan first max(5*toTrim, 10%) records, and stop once we've nuked enough
   if (cacheSize > maxCached) {
     toTrim = cacheSize - maxCached;
-    lookAt = 5 * toTrim;
+    lookAt = std::max(5 * toTrim, cacheSize / 10);
   }
   else {
     lookAt = cacheSize / 10;
   }
 
-  uint64_t maps_size = maps.size();
-  if (maps_size == 0) {
+  const uint64_t numberOfShards = maps.size();
+  if (numberOfShards == 0 || cacheSize == 0) {
     return 0;
   }
 
+  // first we scan a fraction of the shards for expired entries orderded by LRU
   for (auto& content : maps) {
-    auto mc = content.lock();
-    mc->invalidate();
-    auto& sidx = boost::multi_index::get<S>(mc->d_map);
-    uint64_t erased = 0, lookedAt = 0;
+    auto shard = content.lock();
+    const auto shardSize = shard->d_map.size();
+    const uint64_t toScanForThisShard = std::ceil(lookAt * ((1.0 * shardSize) / cacheSize));
+    const uint64_t toTrimForThisShard = toTrim > 0 ? std::ceil(toTrim * ((1.0 * shardSize) / cacheSize)) : 0;
+    shard->invalidate();
+    auto& sidx = boost::multi_index::get<S>(shard->d_map);
+    uint64_t erased = 0;
+    uint64_t lookedAt = 0;
     for (auto i = sidx.begin(); i != sidx.end(); lookedAt++) {
       if (i->isStale(now)) {
-        container.preRemoval(*mc, *i);
+        container.preRemoval(*shard, *i);
         i = sidx.erase(i);
         erased++;
         --content.d_entriesCount;
@@ -167,15 +172,15 @@ uint64_t pruneMutexCollectionsVector(C& container, std::vector<T>& maps, uint64_
         ++i;
       }
 
-      if (toTrim && erased >= toTrim / maps_size)
+      if (toTrim > 0 && erased >= toTrimForThisShard) {
         break;
+      }
 
-      if (lookedAt > lookAt / maps_size)
+      if (lookedAt >= toScanForThisShard) {
         break;
+      }
     }
     totErased += erased;
-    if (toTrim && totErased >= toTrim)
-      break;
   }
 
   if (totErased >= toTrim) { // done
@@ -184,26 +189,53 @@ uint64_t pruneMutexCollectionsVector(C& container, std::vector<T>& maps, uint64_
 
   toTrim -= totErased;
 
-  while (true) {
-    size_t pershard = toTrim / maps_size + 1;
-    for (auto& content : maps) {
-      auto mc = content.lock();
-      mc->invalidate();
-      auto& sidx = boost::multi_index::get<S>(mc->d_map);
-      size_t removed = 0;
-      for (auto i = sidx.begin(); i != sidx.end() && removed < pershard; removed++) {
-        container.preRemoval(*mc, *i);
-        i = sidx.erase(i);
-        --content.d_entriesCount;
-        totErased++;
-        toTrim--;
-        if (toTrim == 0) {
-          return totErased;
-        }
+  // It was not enough, so we need to remove entries that are not
+  // expired, still using the LRU index.
+
+  // From here on cacheSize is the total size of the shards that still
+  // need to be cleaned. When a shard is processed, we subtract its
+  // original size from cacheSize as it used to compute the
+  // fraction of the next shards to clean. This way rounding issues do
+  // not cause over or undershoot of the target.
+  //
+  // Suppose we have 10 perfectly balanced shards, each filled with
+  // 100 entries. So cacheSize is 1000. When cleaning 10%, after shard
+  // 0 we still need to processs 900 entries, spread out of 9
+  // shards. So cacheSize becomes 900, and toTrim 90, since we cleaned
+  // 10 items from shard 0. Our fraction remains 10%. For the last
+  // shard, we would end up with cacheSize 100, and to clean 10.
+  //
+  // When the balance is not perfect, e.g. shard 0 has 54 entries, we
+  // would clean 5 entries due to rounding, and for the remaning
+  // shards we start with cacheSize 946 and toTrim 95: the fraction
+  // becomes slightly larger than 10%, since we "missed" one item in
+  // shard 0.
+
+  cacheSize -= totErased;
+
+  for (auto& content : maps) {
+    auto shard = content.lock();
+    const auto shardSize = shard->d_map.size();
+
+    const uint64_t toTrimForThisShard = std::round(static_cast<double>(toTrim) * shardSize / cacheSize);
+    // See explanation above
+    cacheSize -= shardSize;
+    if (toTrimForThisShard == 0) {
+      continue;
+    }
+    shard->invalidate();
+    auto& sidx = boost::multi_index::get<S>(shard->d_map);
+    size_t removed = 0;
+    for (auto i = sidx.begin(); i != sidx.end() && removed < toTrimForThisShard; removed++) {
+      container.preRemoval(*shard, *i);
+      i = sidx.erase(i);
+      --content.d_entriesCount;
+      ++totErased;
+      if (--toTrim == 0) {
+        return totErased;
       }
     }
   }
-  // Not reached
   return totErased;
 }
 
diff --git a/pdns/recursordist/negcache.cc b/pdns/recursordist/negcache.cc
index 9370c531cb..3097ec0042 100644
--- a/pdns/recursordist/negcache.cc
+++ b/pdns/recursordist/negcache.cc
@@ -31,7 +31,7 @@
 uint16_t NegCache::s_maxServedStaleExtensions;
 
 NegCache::NegCache(size_t mapsCount) :
-  d_maps(mapsCount)
+  d_maps(mapsCount == 0 ? 1 : mapsCount)
 {
 }
 
@@ -258,10 +258,7 @@ void NegCache::clear()
 void NegCache::prune(size_t maxEntries)
 {
   size_t cacheSize = size();
-  cerr << "======= NegCache =======" << endl;
   pruneMutexCollectionsVector<SequenceTag>(*this, d_maps, maxEntries, cacheSize);
-  cerr << "NegCache size is now " << size() << endl;
-  cerr << "========================" << endl;
 }
 
 /*!
@@ -292,6 +289,7 @@ size_t NegCache::doDump(int fd, size_t maxCacheEntries)
   for (auto& mc : d_maps) {
     auto m = mc.lock();
     const auto shardSize = m->d_map.size();
+    fprintf(fp.get(), "; negcache shard %zu; size %zu\n", shard, shardSize);
     min = std::min(min, shardSize);
     max = std::max(max, shardSize);
     shard++;
diff --git a/pdns/recursordist/recursor_cache.cc b/pdns/recursordist/recursor_cache.cc
index 3796c70f44..2bf5d556e9 100644
--- a/pdns/recursordist/recursor_cache.cc
+++ b/pdns/recursordist/recursor_cache.cc
@@ -55,7 +55,7 @@
 uint16_t MemRecursorCache::s_maxServedStaleExtensions;
 
 MemRecursorCache::MemRecursorCache(size_t mapsCount) :
-  d_maps(mapsCount)
+  d_maps(mapsCount == 0 ? 1 : mapsCount)
 {
 }
 
@@ -781,6 +781,7 @@ uint64_t MemRecursorCache::doDump(int fd, size_t maxCacheEntries)
   for (auto& mc : d_maps) {
     auto map = mc.lock();
     const auto shardSize = map->d_map.size();
+    fprintf(fp.get(), "; record cache shard %zu; size %zu\n", shard, shardSize);
     min = std::min(min, shardSize);
     max = std::max(max, shardSize);
     shard++;
@@ -814,10 +815,7 @@ uint64_t MemRecursorCache::doDump(int fd, size_t maxCacheEntries)
 void MemRecursorCache::doPrune(size_t keep)
 {
   size_t cacheSize = size();
-  cerr << "=====-Cache=============" << endl;
   pruneMutexCollectionsVector<SequencedTag>(*this, d_maps, keep, cacheSize);
-  cerr << "Size is now " << size() << endl;
-  cerr << "========================" << endl;
 }
 
 namespace boost
diff --git a/pdns/recursordist/test-negcache_cc.cc b/pdns/recursordist/test-negcache_cc.cc
index 4bbce4abab..3306df1da8 100644
--- a/pdns/recursordist/test-negcache_cc.cc
+++ b/pdns/recursordist/test-negcache_cc.cc
@@ -282,6 +282,28 @@ BOOST_AUTO_TEST_CASE(test_prune)
   struct timeval now;
   Utility::gettimeofday(&now, 0);
 
+  NegCache cache(1);
+  NegCache::NegCacheEntry ne;
+  for (int i = 0; i < 400; i++) {
+    ne = genNegCacheEntry(DNSName(std::to_string(i) + qname), auth, now);
+    cache.add(ne);
+  }
+
+  BOOST_CHECK_EQUAL(cache.size(), 400U);
+
+  cache.prune(100);
+
+  BOOST_CHECK_EQUAL(cache.size(), 100U);
+}
+
+BOOST_AUTO_TEST_CASE(test_prune_many_shards)
+{
+  string qname(".powerdns.com");
+  DNSName auth("powerdns.com");
+
+  struct timeval now;
+  Utility::gettimeofday(&now, 0);
+
   NegCache cache;
   NegCache::NegCacheEntry ne;
   for (int i = 0; i < 400; i++) {
@@ -443,6 +465,7 @@ BOOST_AUTO_TEST_CASE(test_dumpToFile)
   vector<string> expected = {
     "; negcache dump follows\n",
     ";\n",
+    "; negcache shard 0; size 2\n",
     "www1.powerdns.com. 600 IN TYPE0 VIA powerdns.com. ; (Indeterminate) origttl=600 ss=0\n",
     "powerdns.com. 600 IN SOA ns1. hostmaster. 1 2 3 4 5 ; (Indeterminate)\n",
     "powerdns.com. 600 IN RRSIG SOA 5 3 600 20370101000000 20370101000000 24567 dummy. data ;\n",
@@ -453,8 +476,7 @@ BOOST_AUTO_TEST_CASE(test_dumpToFile)
     "powerdns.com. 600 IN RRSIG SOA 5 3 600 20370101000000 20370101000000 24567 dummy. data ;\n",
     "powerdns.com. 600 IN NSEC deadbeef. ; (Indeterminate)\n",
     "powerdns.com. 600 IN RRSIG NSEC 5 3 600 20370101000000 20370101000000 24567 dummy. data ;\n",
-    "; negcache size: 2/0 shards: 1 min/max shard size: 2/2\n"
-    };
+    "; negcache size: 2/0 shards: 1 min/max shard size: 2/2\n"};
 
   struct timeval now;
   Utility::gettimeofday(&now, 0);