Take shard size and number of remaining shards into account when cleaning, so that...

author Otto Moerbeek <otto.moerbeek@open-xchange.com>

Tue, 3 Jan 2023 10:36:54 +0000 (11:36 +0100)

committer Otto Moerbeek <otto.moerbeek@open-xchange.com>

Wed, 4 Jan 2023 10:13:18 +0000 (11:13 +0100)
author Otto Moerbeek <otto.moerbeek@open-xchange.com>
Tue, 3 Jan 2023 10:36:54 +0000 (11:36 +0100)
committer Otto Moerbeek <otto.moerbeek@open-xchange.com>
Wed, 4 Jan 2023 10:13:18 +0000 (11:13 +0100)
diff --git a/pdns/cachecleaner.hh b/pdns/cachecleaner.hh

index f489a9d88f437face61e3528bd4875cf94276919..dc6e0f0f7eebe517519e1dcb7f1b1c67a7822801 100644 (file)
--- a/pdns/cachecleaner.hh
+++ b/pdns/cachecleaner.hh
@@ -131,34 +131,39 @@ uint64_t pruneLockedCollectionsVector(std::vector<T>& maps)
  template <typename S, typename C, typename T>
  uint64_t pruneMutexCollectionsVector(C& container, std::vector<T>& maps, uint64_t maxCached, uint64_t cacheSize)
  {
-  time_t now = time(nullptr);
+  const time_t now = time(nullptr);
    uint64_t totErased = 0;
    uint64_t toTrim = 0;
    uint64_t lookAt = 0;
  
    // two modes - if toTrim is 0, just look through 10%  of the cache and nuke everything that is expired
-  // otherwise, scan first 5*toTrim records, and stop once we've nuked enough
+  // otherwise, scan first max(5*toTrim, 10%) records, and stop once we've nuked enough
    if (cacheSize > maxCached) {
      toTrim = cacheSize - maxCached;
-    lookAt = 5 * toTrim;
+    lookAt = std::max(5 * toTrim, cacheSize / 10);
    }
    else {
      lookAt = cacheSize / 10;
    }
  
-  uint64_t maps_size = maps.size();
-  if (maps_size == 0) {
+  const uint64_t numberOfShards = maps.size();
+  if (numberOfShards == 0 || cacheSize == 0) {
      return 0;
    }
  
+  // first we scan a fraction of the shards for expired entries orderded by LRU
    for (auto& content : maps) {
-    auto mc = content.lock();
-    mc->invalidate();
-    auto& sidx = boost::multi_index::get<S>(mc->d_map);
-    uint64_t erased = 0, lookedAt = 0;
+    auto shard = content.lock();
+    const auto shardSize = shard->d_map.size();
+    const uint64_t toScanForThisShard = std::ceil(lookAt * ((1.0 * shardSize) / cacheSize));
+    const uint64_t toTrimForThisShard = toTrim > 0 ? std::ceil(toTrim * ((1.0 * shardSize) / cacheSize)) : 0;
+    shard->invalidate();
+    auto& sidx = boost::multi_index::get<S>(shard->d_map);
+    uint64_t erased = 0;
+    uint64_t lookedAt = 0;
      for (auto i = sidx.begin(); i != sidx.end(); lookedAt++) {
        if (i->isStale(now)) {
-        container.preRemoval(*mc, *i);
+        container.preRemoval(*shard, *i);
          i = sidx.erase(i);
          erased++;
          --content.d_entriesCount;
@@ -167,15 +172,15 @@ uint64_t pruneMutexCollectionsVector(C& container, std::vector<T>& maps, uint64_
          ++i;
        }
  
-      if (toTrim && erased >= toTrim / maps_size)
+      if (toTrim > 0 && erased >= toTrimForThisShard) {
          break;
+      }
  
-      if (lookedAt > lookAt / maps_size)
+      if (lookedAt >= toScanForThisShard) {
          break;
+      }
      }
      totErased += erased;
-    if (toTrim && totErased >= toTrim)
-      break;
    }
  
    if (totErased >= toTrim) { // done
@@ -184,26 +189,53 @@ uint64_t pruneMutexCollectionsVector(C& container, std::vector<T>& maps, uint64_
  
    toTrim -= totErased;
  
-  while (true) {
-    size_t pershard = toTrim / maps_size + 1;
-    for (auto& content : maps) {
-      auto mc = content.lock();
-      mc->invalidate();
-      auto& sidx = boost::multi_index::get<S>(mc->d_map);
-      size_t removed = 0;
-      for (auto i = sidx.begin(); i != sidx.end() && removed < pershard; removed++) {
-        container.preRemoval(*mc, *i);
-        i = sidx.erase(i);
-        --content.d_entriesCount;
-        totErased++;
-        toTrim--;
-        if (toTrim == 0) {
-          return totErased;
-        }
+  // It was not enough, so we need to remove entries that are not
+  // expired, still using the LRU index.
+
+  // From here on cacheSize is the total size of the shards that still
+  // need to be cleaned. When a shard is processed, we subtract its
+  // original size from cacheSize as it used to compute the
+  // fraction of the next shards to clean. This way rounding issues do
+  // not cause over or undershoot of the target.
+  //
+  // Suppose we have 10 perfectly balanced shards, each filled with
+  // 100 entries. So cacheSize is 1000. When cleaning 10%, after shard
+  // 0 we still need to processs 900 entries, spread out of 9
+  // shards. So cacheSize becomes 900, and toTrim 90, since we cleaned
+  // 10 items from shard 0. Our fraction remains 10%. For the last
+  // shard, we would end up with cacheSize 100, and to clean 10.
+  //
+  // When the balance is not perfect, e.g. shard 0 has 54 entries, we
+  // would clean 5 entries due to rounding, and for the remaning
+  // shards we start with cacheSize 946 and toTrim 95: the fraction
+  // becomes slightly larger than 10%, since we "missed" one item in
+  // shard 0.
+
+  cacheSize -= totErased;
+
+  for (auto& content : maps) {
+    auto shard = content.lock();
+    const auto shardSize = shard->d_map.size();
+
+    const uint64_t toTrimForThisShard = std::round(static_cast<double>(toTrim) * shardSize / cacheSize);
+    // See explanation above
+    cacheSize -= shardSize;
+    if (toTrimForThisShard == 0) {
+      continue;
+    }
+    shard->invalidate();
+    auto& sidx = boost::multi_index::get<S>(shard->d_map);
+    size_t removed = 0;
+    for (auto i = sidx.begin(); i != sidx.end() && removed < toTrimForThisShard; removed++) {
+      container.preRemoval(*shard, *i);
+      i = sidx.erase(i);
+      --content.d_entriesCount;
+      ++totErased;
+      if (--toTrim == 0) {
+        return totErased;
        }
      }
    }
-  // Not reached
    return totErased;
  }
  
diff --git a/pdns/recursordist/negcache.cc b/pdns/recursordist/negcache.cc

index 9370c531cba1af93716406aeb1d700374af728f6..3097ec0042418ff67125d8bad4e5f95c5bc757e7 100644 (file)
--- a/pdns/recursordist/negcache.cc
+++ b/pdns/recursordist/negcache.cc
@@ -31,7 +31,7 @@
  uint16_t NegCache::s_maxServedStaleExtensions;
  
  NegCache::NegCache(size_t mapsCount) :
-  d_maps(mapsCount)
+  d_maps(mapsCount == 0 ? 1 : mapsCount)
  {
  }
  
@@ -258,10 +258,7 @@ void NegCache::clear()
  void NegCache::prune(size_t maxEntries)
  {
    size_t cacheSize = size();
-  cerr << "======= NegCache =======" << endl;
    pruneMutexCollectionsVector<SequenceTag>(*this, d_maps, maxEntries, cacheSize);
-  cerr << "NegCache size is now " << size() << endl;
-  cerr << "========================" << endl;
  }
  
  /*!
@@ -292,6 +289,7 @@ size_t NegCache::doDump(int fd, size_t maxCacheEntries)
    for (auto& mc : d_maps) {
      auto m = mc.lock();
      const auto shardSize = m->d_map.size();
+    fprintf(fp.get(), "; negcache shard %zu; size %zu\n", shard, shardSize);
      min = std::min(min, shardSize);
      max = std::max(max, shardSize);
      shard++;
diff --git a/pdns/recursordist/recursor_cache.cc b/pdns/recursordist/recursor_cache.cc

index 3796c70f4408ea2041b9cc282bcca47c71684149..2bf5d556e999149ac8419db312aece30ccae5d42 100644 (file)
--- a/pdns/recursordist/recursor_cache.cc
+++ b/pdns/recursordist/recursor_cache.cc
@@ -55,7 +55,7 @@
  uint16_t MemRecursorCache::s_maxServedStaleExtensions;
  
  MemRecursorCache::MemRecursorCache(size_t mapsCount) :
-  d_maps(mapsCount)
+  d_maps(mapsCount == 0 ? 1 : mapsCount)
  {
  }
  
@@ -781,6 +781,7 @@ uint64_t MemRecursorCache::doDump(int fd, size_t maxCacheEntries)
    for (auto& mc : d_maps) {
      auto map = mc.lock();
      const auto shardSize = map->d_map.size();
+    fprintf(fp.get(), "; record cache shard %zu; size %zu\n", shard, shardSize);
      min = std::min(min, shardSize);
      max = std::max(max, shardSize);
      shard++;
@@ -814,10 +815,7 @@ uint64_t MemRecursorCache::doDump(int fd, size_t maxCacheEntries)
  void MemRecursorCache::doPrune(size_t keep)
  {
    size_t cacheSize = size();
-  cerr << "=====-Cache=============" << endl;
    pruneMutexCollectionsVector<SequencedTag>(*this, d_maps, keep, cacheSize);
-  cerr << "Size is now " << size() << endl;
-  cerr << "========================" << endl;
  }
  
  namespace boost
diff --git a/pdns/recursordist/test-negcache_cc.cc b/pdns/recursordist/test-negcache_cc.cc

index 4bbce4ababc1c6023459eb2552ffc29b2945d8e8..3306df1da87cacc591b30d64d20a984d36db88df 100644 (file)
--- a/pdns/recursordist/test-negcache_cc.cc
+++ b/pdns/recursordist/test-negcache_cc.cc
@@ -282,6 +282,28 @@ BOOST_AUTO_TEST_CASE(test_prune)
    struct timeval now;
    Utility::gettimeofday(&now, 0);
  
+  NegCache cache(1);
+  NegCache::NegCacheEntry ne;
+  for (int i = 0; i < 400; i++) {
+    ne = genNegCacheEntry(DNSName(std::to_string(i) + qname), auth, now);
+    cache.add(ne);
+  }
+
+  BOOST_CHECK_EQUAL(cache.size(), 400U);
+
+  cache.prune(100);
+
+  BOOST_CHECK_EQUAL(cache.size(), 100U);
+}
+
+BOOST_AUTO_TEST_CASE(test_prune_many_shards)
+{
+  string qname(".powerdns.com");
+  DNSName auth("powerdns.com");
+
+  struct timeval now;
+  Utility::gettimeofday(&now, 0);
+
    NegCache cache;
    NegCache::NegCacheEntry ne;
    for (int i = 0; i < 400; i++) {
@@ -443,6 +465,7 @@ BOOST_AUTO_TEST_CASE(test_dumpToFile)
    vector<string> expected = {
      "; negcache dump follows\n",
      ";\n",
+    "; negcache shard 0; size 2\n",
      "www1.powerdns.com. 600 IN TYPE0 VIA powerdns.com. ; (Indeterminate) origttl=600 ss=0\n",
      "powerdns.com. 600 IN SOA ns1. hostmaster. 1 2 3 4 5 ; (Indeterminate)\n",
      "powerdns.com. 600 IN RRSIG SOA 5 3 600 20370101000000 20370101000000 24567 dummy. data ;\n",
@@ -453,8 +476,7 @@ BOOST_AUTO_TEST_CASE(test_dumpToFile)
      "powerdns.com. 600 IN RRSIG SOA 5 3 600 20370101000000 20370101000000 24567 dummy. data ;\n",
      "powerdns.com. 600 IN NSEC deadbeef. ; (Indeterminate)\n",
      "powerdns.com. 600 IN RRSIG NSEC 5 3 600 20370101000000 20370101000000 24567 dummy. data ;\n",
-    "; negcache size: 2/0 shards: 1 min/max shard size: 2/2\n"
-    };
+    "; negcache size: 2/0 shards: 1 min/max shard size: 2/2\n"};
  
    struct timeval now;
    Utility::gettimeofday(&now, 0);
author	Otto Moerbeek <otto.moerbeek@open-xchange.com>
	Tue, 3 Jan 2023 10:36:54 +0000 (11:36 +0100)
committer	Otto Moerbeek <otto.moerbeek@open-xchange.com>
	Wed, 4 Jan 2023 10:13:18 +0000 (11:13 +0100)
pdns/cachecleaner.hh		patch \| blob \| blame \| history
pdns/recursordist/negcache.cc		patch \| blob \| blame \| history
pdns/recursordist/recursor_cache.cc		patch \| blob \| blame \| history
pdns/recursordist/test-negcache_cc.cc		patch \| blob \| blame \| history