Bug 3553: cache_swap_high ignored and maxCapacity used instead

author Amos Jeffries <squid3@treenet.co.nz>

Wed, 26 Aug 2015 17:51:18 +0000 (10:51 -0700)

committer Amos Jeffries <squid3@treenet.co.nz>

Wed, 26 Aug 2015 17:51:18 +0000 (10:51 -0700)
author Amos Jeffries <squid3@treenet.co.nz>
Wed, 26 Aug 2015 17:51:18 +0000 (10:51 -0700)
committer Amos Jeffries <squid3@treenet.co.nz>
Wed, 26 Aug 2015 17:51:18 +0000 (10:51 -0700)
diff --git a/src/cf.data.pre b/src/cf.data.pre

index d242e4ea900db963291223c6a26276b517d55e63..0c9b0ce791f8d4c5b2d1738f773c01cae974dea7 100644 (file)
--- a/src/cf.data.pre
+++ b/src/cf.data.pre
@@ -4004,18 +4004,26 @@ TYPE: int
  DEFAULT: 90
  LOC: Config.Swap.lowWaterMark
  DOC_START
-       The low-water mark for cache object replacement.
-       Replacement begins when the swap (disk) usage is above the
-       low-water mark and attempts to maintain utilization near the
-       low-water mark.  As swap utilization gets close to high-water
-       mark object eviction becomes more aggressive.  If utilization is
-       close to the low-water mark less replacement is done each time.
+       The low-water mark for AUFS/UFS/diskd cache object eviction by
+       the cache_replacement_policy algorithm.
+
+       Removal begins when the swap (disk) usage of a cache_dir is
+       above this low-water mark and attempts to maintain utilization
+       near the low-water mark.
+
+       As swap utilization increases towards the high-water mark set
+       by cache_swap_high object eviction becomes more agressive.
+
+       The value difference in percentages between low- and high-water
+       marks represent an eviction rate of 200 objects per second and
+       the rate continues to scale in agressiveness by multiples of
+       this above the high-water mark.
  
         Defaults are 90% and 95%. If you have a large cache, 5% could be
         hundreds of MB. If this is the case you may wish to set these
         numbers closer together.
  
-       See also cache_swap_high
+       See also cache_swap_high and cache_replacement_policy
  DOC_END
  
  NAME: cache_swap_high
@@ -4024,18 +4032,26 @@ TYPE: int
  DEFAULT: 95
  LOC: Config.Swap.highWaterMark
  DOC_START
-       The high-water mark for cache object replacement.
-       Replacement begins when the swap (disk) usage is above the
-       low-water mark and attempts to maintain utilization near the
-       low-water mark.  As swap utilization gets close to high-water
-       mark object eviction becomes more aggressive.  If utilization is
-       close to the low-water mark less replacement is done each time.
+       The high-water mark for AUFS/UFS/diskd cache object eviction by
+       the cache_replacement_policy algorithm.
+
+       Removal begins when the swap (disk) usage of a cache_dir is
+       above the low-water mark set by cache_swap_low and attempts to
+       maintain utilization near the low-water mark.
+
+       As swap utilization increases towards this high-water mark object
+       eviction becomes more agressive.
+
+       The value difference in percentages between low- and high-water
+       marks represent an eviction rate of 200 objects per second and
+       the rate continues to scale in agressiveness by multiples of
+       this above the high-water mark.
  
         Defaults are 90% and 95%. If you have a large cache, 5% could be
         hundreds of MB. If this is the case you may wish to set these
         numbers closer together.
  
-       See also cache_swap_low
+       See also cache_swap_low and cache_replacement_policy
  DOC_END
  
  COMMENT_START
diff --git a/src/fs/ufs/UFSSwapDir.cc b/src/fs/ufs/UFSSwapDir.cc

index eb03a0531fa1a5725e1226fe768ec11f85419680..552bb6e2862580dcae62a8ed732391564442a032 100644 (file)
--- a/src/fs/ufs/UFSSwapDir.cc
+++ b/src/fs/ufs/UFSSwapDir.cc
@@ -413,44 +413,89 @@ Fs::Ufs::UFSSwapDir::statfs(StoreEntry & sentry) const
  void
  Fs::Ufs::UFSSwapDir::maintain()
  {
-    /* We can't delete objects while rebuilding swap */
+    /* TODO: possible options for improvement;
+     *
+     * Note that too much aggression here is not good. It means that disk
+     * controller is getting a long queue of removals to act on, along
+     * with its regular I/O queue, and that client traffic is 'paused'
+     * and growing the network I/O queue as well while the scan happens.
+     * Possibly bad knock-on effects as Squid catches up on all that.
+     *
+     * Bug 2448 may have been a sign of what can wrong. At the least it
+     * provides a test case for aggression effects in overflow conditions.
+     *
+     * - base removal limit on space saved, instead of count ?
+     *
+     * - base removal rate on a traffic speed counter ?
+     *   as the purge took up more time out of the second it would grow to
+     *   a graceful full pause
+     *
+     * - pass out a value to cause another event to be scheduled immediately
+     *   instead of waiting a whole second more ?
+     *   knock on; schedule less if all caches are under low-water
+     *
+     * - admin configurable removal rate or count ?
+     *   the current numbers are arbitrary, config helps with experimental
+     *   trials and future-proofing the install base.
+     *   we also have this indirectly by shifting the relative positions
+     *   of low-, high- water and the total capacity limit.
+     */
  
+    /* We can't delete objects while rebuilding swap */
      /* XXX FIXME each store should start maintaining as it comes online. */
-
-    if (StoreController::store_dirs_rebuilding)
+    if (StoreController::store_dirs_rebuilding) {
+        debugs(47, DBG_IMPORTANT, StoreController::store_dirs_rebuilding << " cache_dir still rebuilding. Skip GC for " << path);
          return;
+    }
  
-    StoreEntry *e = NULL;
-
-    int removed = 0;
-
-    RemovalPurgeWalker *walker;
+    // minSize() is swap_low_watermark in bytes
+    const uint64_t lowWaterSz = minSize();
  
-    double f = (double) (currentSize() - minSize()) / (maxSize() - minSize());
+    if (currentSize() < lowWaterSz) {
+        debugs(47, 2, "space still available in " << path);
+        return;
+    }
  
-    f = f < 0.0 ? 0.0 : f > 1.0 ? 1.0 : f;
+    // maxSize() is cache_dir total size in bytes
+    const uint64_t highWaterSz = ((maxSize() * Config.Swap.highWaterMark) / 100);
+
+    // f is percentage of 'gap' filled between low- and high-water.
+    // Used to reduced purge rate when between water markers, and
+    // to multiply it more agressively the further above high-water
+    // it reaches. But in a graceful linear growth curve.
+    double f = 1.0;
+    if (highWaterSz > lowWaterSz) {
+        // might be equal. n/0 is bad.
+        f = (double) (currentSize() - lowWaterSz) / (highWaterSz - lowWaterSz);
+    }
  
+    // how deep to look for a single object that can be removed
      int max_scan = (int) (f * 400.0 + 100.0);
  
-    int max_remove = (int) (f * 70.0 + 10.0);
+    // try to purge only this many objects this cycle.
+    int max_remove = (int) (f * 200.0 + 20.0);
  
      /*
       * This is kinda cheap, but so we need this priority hack?
       */
+    debugs(47, 3, "f=" << f << ", max_scan=" << max_scan << ", max_remove=" << max_remove);
  
-    debugs(47, 3, HERE << "f=" << f << ", max_scan=" << max_scan << ", max_remove=" << max_remove  );
+    RemovalPurgeWalker *walker = repl->PurgeInit(repl, max_scan);
  
-    walker = repl->PurgeInit(repl, max_scan);
-
-    while (1) {
-        if (currentSize() < minSize())
-            break;
+    int removed = 0;
+    // only purge while above low-water
+    while (currentSize() >= lowWaterSz) {
  
+        // stop if we reached max removals for this cycle,
+        // Bug 2448 may be from this not clearing enough,
+        // but it predates the current algorithm so not sure
          if (removed >= max_remove)
              break;
  
-        e = walker->Next(walker);
+        StoreEntry *e = walker->Next(walker);
  
+        // stop if all objects are locked / in-use,
+        // or the cache is empty
          if (!e)
              break;      /* no more objects */
  
@@ -460,9 +505,12 @@ Fs::Ufs::UFSSwapDir::maintain()
      }
  
      walker->Done(walker);
-    debugs(47, (removed ? 2 : 3), HERE << path <<
+    debugs(47, (removed ? 2 : 3), path <<
             " removed " << removed << "/" << max_remove << " f=" <<
             std::setprecision(4) << f << " max_scan=" << max_scan);
+
+    // what if cache is still over the high watermark ?
+    // Store::Maintain() schedules another purge in 1 second.
  }
  
  void
author	Amos Jeffries <squid3@treenet.co.nz>
	Wed, 26 Aug 2015 17:51:18 +0000 (10:51 -0700)
committer	Amos Jeffries <squid3@treenet.co.nz>
	Wed, 26 Aug 2015 17:51:18 +0000 (10:51 -0700)
src/cf.data.pre		patch \| blob \| blame \| history
src/fs/ufs/UFSSwapDir.cc		patch \| blob \| blame \| history