From 9346ae6538d08ffd496f314a6bfb14631da7ec37 Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Sat, 29 Aug 2015 13:21:33 -0700 Subject: [PATCH] Bug 3553: cache_swap_high ignored and maxCapacity used instead Also, to make matters worse the amount of objects (max 70) being purged on each of the 1-second maintenance loops was far too small for the traffic speeds of up to 20k RPS now being processed by proxies. This fixes the cache_swap_high behaviour to closer match what is documented at present, although some documentation does say it cleans all the way down to the low-water mark. Which appears never to have been true in regards to one cycle but would occur over several of the proxy speed was not too high. With this updated algorithm there is almost no limit to how far the aggressiveness can scale, but it is linear at 300 objects per multiple of the gap between low- and high- watermark. SwapDir::maintain is now fairly well documented and debug traces added. With several TODO ideas for future improvement also documented in the method code. --- src/cf.data.pre | 44 +++++++++++++------- src/fs/ufs/UFSSwapDir.cc | 86 +++++++++++++++++++++++++++++++--------- 2 files changed, 97 insertions(+), 33 deletions(-) diff --git a/src/cf.data.pre b/src/cf.data.pre index f2762f79f4..c922aa3754 100644 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@ -3888,18 +3888,26 @@ TYPE: int DEFAULT: 90 LOC: Config.Swap.lowWaterMark DOC_START - The low-water mark for cache object replacement. - Replacement begins when the swap (disk) usage is above the - low-water mark and attempts to maintain utilization near the - low-water mark. As swap utilization gets close to high-water - mark object eviction becomes more aggressive. If utilization is - close to the low-water mark less replacement is done each time. + The low-water mark for AUFS/UFS/diskd cache object eviction by + the cache_replacement_policy algorithm. + + Removal begins when the swap (disk) usage of a cache_dir is + above this low-water mark and attempts to maintain utilization + near the low-water mark. + + As swap utilization increases towards the high-water mark set + by cache_swap_high object eviction becomes more agressive. + + The value difference in percentages between low- and high-water + marks represent an eviction rate of 300 objects per second and + the rate continues to scale in agressiveness by multiples of + this above the high-water mark. Defaults are 90% and 95%. If you have a large cache, 5% could be hundreds of MB. If this is the case you may wish to set these numbers closer together. - See also cache_swap_high + See also cache_swap_high and cache_replacement_policy DOC_END NAME: cache_swap_high @@ -3908,18 +3916,26 @@ TYPE: int DEFAULT: 95 LOC: Config.Swap.highWaterMark DOC_START - The high-water mark for cache object replacement. - Replacement begins when the swap (disk) usage is above the - low-water mark and attempts to maintain utilization near the - low-water mark. As swap utilization gets close to high-water - mark object eviction becomes more aggressive. If utilization is - close to the low-water mark less replacement is done each time. + The high-water mark for AUFS/UFS/diskd cache object eviction by + the cache_replacement_policy algorithm. + + Removal begins when the swap (disk) usage of a cache_dir is + above the low-water mark set by cache_swap_low and attempts to + maintain utilization near the low-water mark. + + As swap utilization increases towards this high-water mark object + eviction becomes more agressive. + + The value difference in percentages between low- and high-water + marks represent an eviction rate of 300 objects per second and + the rate continues to scale in agressiveness by multiples of + this above the high-water mark. Defaults are 90% and 95%. If you have a large cache, 5% could be hundreds of MB. If this is the case you may wish to set these numbers closer together. - See also cache_swap_low + See also cache_swap_low and cache_replacement_policy DOC_END COMMENT_START diff --git a/src/fs/ufs/UFSSwapDir.cc b/src/fs/ufs/UFSSwapDir.cc index 87f5f682a9..65a6535c58 100644 --- a/src/fs/ufs/UFSSwapDir.cc +++ b/src/fs/ufs/UFSSwapDir.cc @@ -412,44 +412,89 @@ Fs::Ufs::UFSSwapDir::statfs(StoreEntry & sentry) const void Fs::Ufs::UFSSwapDir::maintain() { - /* We can't delete objects while rebuilding swap */ + /* TODO: possible options for improvement; + * + * Note that too much aggression here is not good. It means that disk + * controller is getting a long queue of removals to act on, along + * with its regular I/O queue, and that client traffic is 'paused' + * and growing the network I/O queue as well while the scan happens. + * Possibly bad knock-on effects as Squid catches up on all that. + * + * Bug 2448 may have been a sign of what can wrong. At the least it + * provides a test case for aggression effects in overflow conditions. + * + * - base removal limit on space saved, instead of count ? + * + * - base removal rate on a traffic speed counter ? + * as the purge took up more time out of the second it would grow to + * a graceful full pause + * + * - pass out a value to cause another event to be scheduled immediately + * instead of waiting a whole second more ? + * knock on; schedule less if all caches are under low-water + * + * - admin configurable removal rate or count ? + * the current numbers are arbitrary, config helps with experimental + * trials and future-proofing the install base. + * we also have this indirectly by shifting the relative positions + * of low-, high- water and the total capacity limit. + */ + /* We can't delete objects while rebuilding swap */ /* XXX FIXME each store should start maintaining as it comes online. */ - - if (StoreController::store_dirs_rebuilding) + if (StoreController::store_dirs_rebuilding) { + debugs(47, DBG_IMPORTANT, StoreController::store_dirs_rebuilding << " cache_dir still rebuilding. Skip GC for " << path); return; + } - StoreEntry *e = NULL; - - int removed = 0; - - RemovalPurgeWalker *walker; + // minSize() is swap_low_watermark in bytes + const uint64_t lowWaterSz = minSize(); - double f = (double) (currentSize() - minSize()) / (maxSize() - minSize()); + if (currentSize() < lowWaterSz) { + debugs(47, 2, "space still available in " << path); + return; + } - f = f < 0.0 ? 0.0 : f > 1.0 ? 1.0 : f; + // maxSize() is cache_dir total size in bytes + const uint64_t highWaterSz = ((maxSize() * Config.Swap.highWaterMark) / 100); + + // f is percentage of 'gap' filled between low- and high-water. + // Used to reduced purge rate when between water markers, and + // to multiply it more agressively the further above high-water + // it reaches. But in a graceful linear growth curve. + double f = 1.0; + if (highWaterSz > lowWaterSz) { + // might be equal. n/0 is bad. + f = (double) (currentSize() - lowWaterSz) / (highWaterSz - lowWaterSz); + } + // how deep to look for a single object that can be removed int max_scan = (int) (f * 400.0 + 100.0); - int max_remove = (int) (f * 70.0 + 10.0); + // try to purge only this many objects this cycle. + int max_remove = (int) (f * 300.0 + 20.0); /* * This is kinda cheap, but so we need this priority hack? */ + debugs(47, 3, "f=" << f << ", max_scan=" << max_scan << ", max_remove=" << max_remove); - debugs(47, 3, HERE << "f=" << f << ", max_scan=" << max_scan << ", max_remove=" << max_remove ); + RemovalPurgeWalker *walker = repl->PurgeInit(repl, max_scan); - walker = repl->PurgeInit(repl, max_scan); - - while (1) { - if (currentSize() < minSize()) - break; + int removed = 0; + // only purge while above low-water + while (currentSize() >= lowWaterSz) { + // stop if we reached max removals for this cycle, + // Bug 2448 may be from this not clearing enough, + // but it predates the current algorithm so not sure if (removed >= max_remove) break; - e = walker->Next(walker); + StoreEntry *e = walker->Next(walker); + // stop if all objects are locked / in-use, + // or the cache is empty if (!e) break; /* no more objects */ @@ -459,9 +504,12 @@ Fs::Ufs::UFSSwapDir::maintain() } walker->Done(walker); - debugs(47, (removed ? 2 : 3), HERE << path << + debugs(47, (removed ? 2 : 3), path << " removed " << removed << "/" << max_remove << " f=" << std::setprecision(4) << f << " max_scan=" << max_scan); + + // what if cache is still over the high watermark ? + // Store::Maintain() schedules another purge in 1 second. } void -- 2.47.2