Accumulate fewer unknown-size responses to avoid overwhelming disks.

author Alex Rousskov <rousskov@measurement-factory.com>

Sun, 1 May 2016 21:37:52 +0000 (15:37 -0600)

committer Alex Rousskov <rousskov@measurement-factory.com>

Sun, 1 May 2016 21:37:52 +0000 (15:37 -0600)
author Alex Rousskov <rousskov@measurement-factory.com>
Sun, 1 May 2016 21:37:52 +0000 (15:37 -0600)
committer Alex Rousskov <rousskov@measurement-factory.com>
Sun, 1 May 2016 21:37:52 +0000 (15:37 -0600)
diff --git a/src/MemStore.cc b/src/MemStore.cc

index c8db5ca52f5c7198d3ea11bcfab8348ac7610e29..ffece9beba85a6e0eb37e8a0984f16c84e93ebcd 100644 (file)
--- a/src/MemStore.cc
+++ b/src/MemStore.cc
@@ -625,16 +625,8 @@ MemStore::shouldCache(StoreEntry &e) const
      }
  
      const int64_t expectedSize = e.mem_obj->expectedReplySize(); // may be < 0
-
-    // objects of unknown size are not allowed into memory cache, for now
-    if (expectedSize < 0) {
-        debugs(20, 5, "Unknown expected size: " << e);
-        return false;
-    }
-
      const int64_t loadedSize = e.mem_obj->endOffset();
      const int64_t ramSize = max(loadedSize, expectedSize);
-
      if (ramSize > maxObjectSize()) {
          debugs(20, 5, HERE << "Too big max(" <<
                 loadedSize << ", " << expectedSize << "): " << e);
@@ -674,7 +666,10 @@ MemStore::startCaching(StoreEntry &e)
      e.mem_obj->memCache.index = index;
      e.mem_obj->memCache.io = MemObject::ioWriting;
      slot->set(e);
-    map->startAppending(index);
+    // Do not allow others to feed off an unknown-size entry because we will
+    // stop swapping it out if it grows too large.
+    if (e.mem_obj->expectedReplySize() >= 0)
+        map->startAppending(index);
      e.memOutDecision(true);
      return true;
  }
@@ -700,6 +695,9 @@ MemStore::copyToShm(StoreEntry &e)
          return; // nothing to do (yet)
      }
  
+    // throw if an accepted unknown-size entry grew too big or max-size changed
+    Must(eSize <= maxObjectSize());
+
      const int32_t index = e.mem_obj->memCache.index;
      assert(index >= 0);
      Ipc::StoreMapAnchor &anchor = map->writeableEntry(index);
diff --git a/src/fs/rock/RockIoState.cc b/src/fs/rock/RockIoState.cc

index e43ea441a5e571402b6682fd0af901d74ed46be5..983adc45c07a001c1b65451f85df1f17c9201499 100644 (file)
--- a/src/fs/rock/RockIoState.cc
+++ b/src/fs/rock/RockIoState.cc
@@ -184,6 +184,9 @@ Rock::IoState::tryWrite(char const *buf, size_t size, off_t coreOff)
      // either this is the first write or append; we do not support write gaps
      assert(!coreOff || coreOff == -1);
  
+    // throw if an accepted unknown-size entry grew too big or max-size changed
+    Must(offset_ + size <= static_cast<uint64_t>(dir->maxObjectSize()));
+
      // allocate the first slice during the first write
      if (!coreOff) {
          assert(sidCurrent < 0);
diff --git a/src/fs/rock/RockSwapDir.cc b/src/fs/rock/RockSwapDir.cc

index 34d26f12004222b858cac8b0904da07ee7b812de..cbcc70b392b7a26a76d33bcd3e7486784746481f 100644 (file)
--- a/src/fs/rock/RockSwapDir.cc
+++ b/src/fs/rock/RockSwapDir.cc
@@ -604,7 +604,9 @@ Rock::SwapDir::rebuild()
  bool
  Rock::SwapDir::canStore(const StoreEntry &e, int64_t diskSpaceNeeded, int &load) const
  {
-    if (!::SwapDir::canStore(e, sizeof(DbCellHeader)+diskSpaceNeeded, load))
+    if (diskSpaceNeeded >= 0)
+        diskSpaceNeeded += sizeof(DbCellHeader);
+    if (!::SwapDir::canStore(e, diskSpaceNeeded, load))
          return false;
  
      if (!theFile || !theFile->canWrite())
diff --git a/src/fs/ufs/UFSStoreState.cc b/src/fs/ufs/UFSStoreState.cc

index 678df172e2319687158eb8eded54ef77755006dd..e4527a0c09683c9b5b66f604b710cc4c2efa1e3b 100644 (file)
--- a/src/fs/ufs/UFSStoreState.cc
+++ b/src/fs/ufs/UFSStoreState.cc
@@ -14,6 +14,7 @@
  #include "DiskIO/ReadRequest.h"
  #include "DiskIO/WriteRequest.h"
  #include "Generic.h"
+#include "SquidConfig.h"
  #include "SquidList.h"
  #include "Store.h"
  #include "store/Disk.h"
@@ -167,6 +168,15 @@ Fs::Ufs::UFSStoreState::write(char const *buf, size_t size, off_t aOffset, FREE
          return false;
      }
  
+    const Store::Disk &dir = *INDEXSD(swap_dirn);
+    if (offset_ + size > static_cast<uint64_t>(dir.maxObjectSize())) {
+        debugs(79, 2, "accepted unknown-size entry grew too big: " <<
+               (offset_ + size) << " > " << dir.maxObjectSize());
+        free_func((void*)buf);
+        tryClosing();
+        return false;
+    }
+
      queueWrite(buf, size, aOffset, free_func);
      drainWriteQueue();
      return true;
diff --git a/src/store.cc b/src/store.cc

index 3f91d468ef256c1810ea67bbf0f14f8ccc991475..fbd8a7f2a0fe09380d1791dc8892c9ff8b7d96a8 100644 (file)
--- a/src/store.cc
+++ b/src/store.cc
@@ -1370,40 +1370,10 @@ storeInit(void)
      storeRegisterWithCacheManager();
  }
  
-/// computes maximum size of a cachable object
-/// larger objects are rejected by all (disk and memory) cache stores
-static int64_t
-storeCalcMaxObjSize()
-{
-    int64_t ms = 0; // nothing can be cached without at least one store consent
-
-    // global maximum is at least the disk store maximum
-    for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
-        assert (Config.cacheSwap.swapDirs[i].getRaw());
-        const int64_t storeMax = dynamic_cast<SwapDir *>(Config.cacheSwap.swapDirs[i].getRaw())->maxObjectSize();
-        if (ms < storeMax)
-            ms = storeMax;
-    }
-
-    // global maximum is at least the memory store maximum
-    // TODO: move this into a memory cache class when we have one
-    const int64_t memMax = static_cast<int64_t>(min(Config.Store.maxInMemObjSize, Config.memMaxSize));
-    if (ms < memMax)
-        ms = memMax;
-
-    return ms;
-}
-
  void
  storeConfigure(void)
  {
-    store_swap_high = (long) (((float) Store::Root().maxSize() *
-                               (float) Config.Swap.highWaterMark) / (float) 100);
-    store_swap_low = (long) (((float) Store::Root().maxSize() *
-                              (float) Config.Swap.lowWaterMark) / (float) 100);
-    store_pages_max = Config.memMaxSize / sizeof(mem_node);
-
-    store_maxobjsize = storeCalcMaxObjSize();
+    Store::Root().updateLimits();
  }
  
  bool
diff --git a/src/store/Controller.cc b/src/store/Controller.cc

index 9af6e609a4928fc86a0a6d9a84c3183039ef850c..352370c6e65680e0bb339bf8135b181649ea95fd 100644 (file)
--- a/src/store/Controller.cc
+++ b/src/store/Controller.cc
@@ -183,6 +183,23 @@ Store::Controller::maxObjectSize() const
      return swapDir->maxObjectSize();
  }
  
+void
+Store::Controller::updateLimits()
+{
+    swapDir->updateLimits();
+
+    store_swap_high = (long) (((float) maxSize() *
+                               (float) Config.Swap.highWaterMark) / (float) 100);
+    store_swap_low = (long) (((float) maxSize() *
+                              (float) Config.Swap.lowWaterMark) / (float) 100);
+    store_pages_max = Config.memMaxSize / sizeof(mem_node);
+
+    // TODO: move this into a memory cache class when we have one
+    const int64_t memMax = static_cast<int64_t>(min(Config.Store.maxInMemObjSize, Config.memMaxSize));
+    const int64_t disksMax = swapDir ? swapDir->maxObjectSize() : 0;
+    store_maxobjsize = std::max(disksMax, memMax);
+}
+
  StoreSearch *
  Store::Controller::search()
  {
@@ -326,6 +343,13 @@ Store::Controller::find(const cache_key *key)
      return nullptr;
  }
  
+int64_t
+Store::Controller::accumulateMore(StoreEntry &entry) const
+{
+    return swapDir ? swapDir->accumulateMore(entry) : 0;
+    // The memory cache should not influence for-swapout accumulation decision.
+}
+
  void
  Store::Controller::markForUnlink(StoreEntry &e)
  {
diff --git a/src/store/Controller.h b/src/store/Controller.h

index 65eb26ea8a41069baca510a5db95520f374bf4aa..71e4eebf889919a12f500d8c18cde2555768315c 100644 (file)
--- a/src/store/Controller.h
+++ b/src/store/Controller.h
@@ -42,6 +42,13 @@ public:
      virtual void unlink(StoreEntry &) override;
      virtual int callback() override;
  
+    /// Additional unknown-size entry bytes required by Store in order to
+    /// reduce the risk of selecting the wrong disk cache for the growing entry.
+    int64_t accumulateMore(StoreEntry &) const;
+
+    /// slowly calculate (and cache) hi/lo watermarks and similar limits
+    void updateLimits();
+
      /// called when the entry is no longer needed by any transaction
      void handleIdleEntry(StoreEntry &);
  
diff --git a/src/store/Disk.cc b/src/store/Disk.cc

index 48d323dcab62e839ccaf921f0ce965d17171985e..d7e3edb129fa1cd6ed12ab8716cd216c6b6649e8 100644 (file)
--- a/src/store/Disk.cc
+++ b/src/store/Disk.cc
@@ -22,7 +22,7 @@
  #include "tools.h"
  
  Store::Disk::Disk(char const *aType): theType(aType),
-    max_size(0), min_objsize(0), max_objsize (-1),
+    max_size(0), min_objsize(-1), max_objsize (-1),
      path(NULL), index(-1), disker(-1),
      repl(NULL), removals(0), scanned(0),
      cleanLog(NULL)
@@ -92,6 +92,13 @@ Store::Disk::minSize() const
      return ((maxSize() * Config.Swap.lowWaterMark) / 100);
  }
  
+int64_t
+Store::Disk::minObjectSize() const
+{
+    // per-store min-size=N value is authoritative
+    return min_objsize > -1 ? min_objsize : Config.Store.minObjectSize;
+}
+
  int64_t
  Store::Disk::maxObjectSize() const
  {
@@ -148,19 +155,9 @@ Store::Disk::diskFull()
  bool
  Store::Disk::objectSizeIsAcceptable(int64_t objsize) const
  {
-    // without limits, all object sizes are acceptable, including unknown ones
-    if (min_objsize <= 0 && max_objsize == -1)
-        return true;
-
-    // with limits, objects with unknown sizes are not acceptable
-    if (objsize == -1)
-        return false;
-
-    // without the upper limit, just check the lower limit
-    if (max_objsize == -1)
-        return  min_objsize <= objsize;
-
-    return min_objsize <= objsize && objsize < max_objsize;
+    // need either the expected or the already accumulated object size
+    assert(objsize >= 0);
+    return minObjectSize() <= objsize && objsize <= maxObjectSize();
  }
  
  bool
@@ -380,7 +377,7 @@ Store::Disk::optionObjectSizeParse(char const *option, const char *value, int is
  void
  Store::Disk::optionObjectSizeDump(StoreEntry * e) const
  {
-    if (min_objsize != 0)
+    if (min_objsize != -1)
          storeAppendPrintf(e, " min-size=%" PRId64, min_objsize);
  
      if (max_objsize != -1)
diff --git a/src/store/Disk.h b/src/store/Disk.h

index 054be9523f093fd61811220e0d6f3f3e86aba0c3..4b47b49048a98d609b4063ebe7a99f705970385e 100644 (file)
--- a/src/store/Disk.h
+++ b/src/store/Disk.h
@@ -54,6 +54,9 @@ public:
      virtual bool dereference(StoreEntry &e) override;
      virtual void maintain() override;
  
+    /// the size of the smallest entry this cache_dir can store
+    int64_t minObjectSize() const;
+
      /// configure the maximum object size for this storage area.
      /// May be any size up to the total storage area.
      void maxObjectSize(int64_t newMax);
diff --git a/src/store/Disks.cc b/src/store/Disks.cc

index f1bea0caef10f3f29396877cc35140016d89497c..53377e43f74817fc6ddf485926f26024d749983d 100644 (file)
--- a/src/store/Disks.cc
+++ b/src/store/Disks.cc
@@ -27,6 +27,24 @@ static STDIRSELECT storeDirSelectSwapDirLeastLoad;
   */
  STDIRSELECT *storeDirSelectSwapDir = storeDirSelectSwapDirLeastLoad;
  
+/// The entry size to use for Disk::canStore() size limit checks.
+/// This is an optimization to avoid similar calculations in every cache_dir.
+static int64_t
+objectSizeForDirSelection(const StoreEntry &entry)
+{
+    // entry.objectLen() is negative here when we are still STORE_PENDING
+    int64_t minSize = entry.mem_obj->expectedReplySize();
+
+    // If entry size is unknown, use already accumulated bytes as an estimate.
+    // Controller::accumulateMore() guarantees that there are enough of them.
+    if (minSize < 0)
+        minSize = entry.mem_obj->endOffset();
+
+    assert(minSize >= 0);
+    minSize += entry.mem_obj->swap_hdr_sz;
+    return minSize;
+}
+
  /**
   * This new selection scheme simply does round-robin on all SwapDirs.
   * A SwapDir is skipped if it is over the max_size (100%) limit, or
@@ -35,10 +53,7 @@ STDIRSELECT *storeDirSelectSwapDir = storeDirSelectSwapDirLeastLoad;
  static int
  storeDirSelectSwapDirRoundRobin(const StoreEntry * e)
  {
-    // e->objectLen() is negative at this point when we are still STORE_PENDING
-    ssize_t objsize = e->mem_obj->expectedReplySize();
-    if (objsize != -1)
-        objsize += e->mem_obj->swap_hdr_sz;
+    const int64_t objsize = objectSizeForDirSelection(*e);
  
      // Increment the first candidate once per selection (not once per
      // iteration) to reduce bias when some disk(s) attract more entries.
@@ -81,18 +96,14 @@ static int
  storeDirSelectSwapDirLeastLoad(const StoreEntry * e)
  {
      int64_t most_free = 0;
-    ssize_t least_objsize = -1;
+    int64_t best_objsize = -1;
      int least_load = INT_MAX;
      int load;
      int dirn = -1;
      int i;
      RefCount<SwapDir> SD;
  
-    // e->objectLen() is negative at this point when we are still STORE_PENDING
-    ssize_t objsize = e->mem_obj->expectedReplySize();
-
-    if (objsize != -1)
-        objsize += e->mem_obj->swap_hdr_sz;
+    const int64_t objsize = objectSizeForDirSelection(*e);
  
      for (i = 0; i < Config.cacheSwap.n_configured; ++i) {
          SD = dynamic_cast<SwapDir *>(INDEXSD(i));
@@ -111,11 +122,14 @@ storeDirSelectSwapDirLeastLoad(const StoreEntry * e)
  
          /* If the load is equal, then look in more details */
          if (load == least_load) {
-            /* closest max-size fit */
-
-            if (least_objsize != -1)
-                if (SD->maxObjectSize() > least_objsize)
+            /* best max-size fit */
+            if (best_objsize != -1) {
+                // cache_dir with the smallest max-size gets the known-size object
+                // cache_dir with the largest max-size gets the unknown-size object
+                if ((objsize != -1 && SD->maxObjectSize() > best_objsize) ||
+                    (objsize == -1 && SD->maxObjectSize() < best_objsize))
                      continue;
+            }
  
              /* most free */
              if (cur_free < most_free)
@@ -123,7 +137,7 @@ storeDirSelectSwapDirLeastLoad(const StoreEntry * e)
          }
  
          least_load = load;
-        least_objsize = SD->maxObjectSize();
+        best_objsize = SD->maxObjectSize();
          most_free = cur_free;
          dirn = i;
      }
@@ -134,6 +148,13 @@ storeDirSelectSwapDirLeastLoad(const StoreEntry * e)
      return dirn;
  }
  
+Store::Disks::Disks():
+    largestMinimumObjectSize(-1),
+    largestMaximumObjectSize(-1),
+    secondLargestMaximumObjectSize(-1)
+{
+}
+
  SwapDir *
  Store::Disks::store(int const x) const
  {
@@ -330,14 +351,68 @@ Store::Disks::currentCount() const
  int64_t
  Store::Disks::maxObjectSize() const
  {
-    int64_t result = -1;
+    return largestMaximumObjectSize;
+}
+
+void
+Store::Disks::updateLimits()
+{
+    largestMinimumObjectSize = -1;
+    largestMaximumObjectSize = -1;
+    secondLargestMaximumObjectSize = -1;
  
      for (int i = 0; i < Config.cacheSwap.n_configured; ++i) {
-        if (dir(i).active() && store(i)->maxObjectSize() > result)
-            result = store(i)->maxObjectSize();
+        const auto &disk = dir(i);
+        if (!disk.active())
+            continue;
+
+        if (disk.minObjectSize() > largestMinimumObjectSize)
+            largestMinimumObjectSize = disk.minObjectSize();
+
+        const auto diskMaxObjectSize = disk.maxObjectSize();
+        if (diskMaxObjectSize > largestMaximumObjectSize) {
+            if (largestMaximumObjectSize >= 0) // was set
+                secondLargestMaximumObjectSize = largestMaximumObjectSize;
+            largestMaximumObjectSize = diskMaxObjectSize;
+        }
      }
+}
  
-    return result;
+int64_t
+Store::Disks::accumulateMore(const StoreEntry &entry) const
+{
+    const auto accumulated = entry.mem_obj->availableForSwapOut();
+
+    /* 
+     * Keep accumulating more bytes until the set of disks eligible to accept
+     * the entry becomes stable, and, hence, accumulating more is not going to
+     * affect the cache_dir selection. A stable set is usually reached
+     * immediately (or soon) because most configurations either do not use
+     * cache_dirs with explicit min-size/max-size limits or use the same
+     * max-size limit for all cache_dirs (and low min-size limits).
+     */
+
+    // Can the set of min-size cache_dirs accepting this entry change?
+    if (accumulated < largestMinimumObjectSize)
+        return largestMinimumObjectSize - accumulated;
+
+    // Can the set of max-size cache_dirs accepting this entry change
+    // (other than when the entry exceeds the largest maximum; see below)?
+    if (accumulated <= secondLargestMaximumObjectSize)
+        return secondLargestMaximumObjectSize - accumulated + 1;
+
+    /* 
+     * Checking largestMaximumObjectSize instead eliminates the risk of starting
+     * to swap out an entry that later grows too big, but also implies huge
+     * accumulation in most environments. Accumulating huge entries not only
+     * consumes lots of RAM but also creates a burst of doPages() write requests
+     * that overwhelm the disk. To avoid these problems, we take the risk and
+     * allow swap out now. The disk will quit swapping out if the entry
+     * eventually grows too big for its selected cache_dir.
+     */
+    debugs(20, 3, "no: " << accumulated << '>' <<
+           secondLargestMaximumObjectSize << ',' << largestMinimumObjectSize);
+    return 0;
  }
  
  void
diff --git a/src/store/Disks.h b/src/store/Disks.h

index 1203e1c815f4f1b61a3eaa3d81a66a5fac45a940..3b72034f9f28aa2c2287a28658b70405ab7deeb1 100644 (file)
--- a/src/store/Disks.h
+++ b/src/store/Disks.h
@@ -18,6 +18,8 @@ namespace Store {
  class Disks: public Controlled
  {
  public:
+    Disks();
+
      /* Storage API */
      virtual void create() override;
      virtual void init() override;
@@ -40,10 +42,21 @@ public:
      virtual void unlink(StoreEntry &) override;
      virtual int callback() override;
  
+    /// slowly calculate (and cache) hi/lo watermarks and similar limits
+    void updateLimits();
+
+    /// Additional unknown-size entry bytes required by disks in order to 
+    /// reduce the risk of selecting the wrong disk cache for the growing entry.
+    int64_t accumulateMore(const StoreEntry&) const;
+
  private:
      /* migration logic */
      SwapDir *store(int const x) const;
      SwapDir &dir(int const idx) const;
+
+    int64_t largestMinimumObjectSize; ///< maximum of all Disk::minObjectSize()s
+    int64_t largestMaximumObjectSize; ///< maximum of all Disk::maxObjectSize()s
+    int64_t secondLargestMaximumObjectSize; ///< the second-biggest Disk::maxObjectSize()
  };
  
  } // namespace Store
diff --git a/src/store_swapout.cc b/src/store_swapout.cc

index d8ff59d6c236489a37b5ab63f7f29fa18602fd79..ac18bdacfb54d575982351ee5d214b91e7711646 100644 (file)
--- a/src/store_swapout.cc
+++ b/src/store_swapout.cc
@@ -427,20 +427,11 @@ StoreEntry::mayStartSwapOut()
  
          // prevent final default swPossible answer for yet unknown length
          if (expectedEnd < 0 && store_status != STORE_OK) {
-            const int64_t maxKnownSize = mem_obj->availableForSwapOut();
-            debugs(20, 7, HERE << "maxKnownSize= " << maxKnownSize);
-            /*
-             * NOTE: the store_maxobjsize here is the global maximum
-             * size of object cacheable in any of Squid cache stores
-             * both disk and memory stores.
-             *
-             * However, I am worried that this
-             * deferance may consume a lot of memory in some cases.
-             * Should we add an option to limit this memory consumption?
-             */
-            debugs(20, 5,  HERE << "Deferring swapout start for " <<
-                   (store_maxobjsize - maxKnownSize) << " bytes");
-            return true; // may still fit, but no final decision yet
+            const int64_t more = Store::Root().accumulateMore(*this);
+            if (more > 0) {
+                debugs(20, 5, "got " << currentEnd << "; defer decision for " << more << " more bytes");
+                return true; // may still fit, but no final decision yet
+            }
          }
      }
  
diff --git a/src/tests/stub_SwapDir.cc b/src/tests/stub_SwapDir.cc

index 9e0d5a961dd59d9c19390a327d5b54aae7bf830f..773fbf0222576f1155af58ffc9e130be08cfa46c 100644 (file)
--- a/src/tests/stub_SwapDir.cc
+++ b/src/tests/stub_SwapDir.cc
@@ -22,6 +22,7 @@ void SwapDir::stat(StoreEntry &) const STUB
  void SwapDir::statfs(StoreEntry &)const STUB
  void SwapDir::maintain() STUB
  uint64_t SwapDir::minSize() const STUB_RETVAL(0)
+int64_t SwapDir::minObjectSize() const STUB_RETVAL(0)
  int64_t SwapDir::maxObjectSize() const STUB_RETVAL(0)
  void SwapDir::maxObjectSize(int64_t) STUB
  void SwapDir::reference(StoreEntry &) STUB
author	Alex Rousskov <rousskov@measurement-factory.com>
	Sun, 1 May 2016 21:37:52 +0000 (15:37 -0600)
committer	Alex Rousskov <rousskov@measurement-factory.com>
	Sun, 1 May 2016 21:37:52 +0000 (15:37 -0600)
src/MemStore.cc		patch \| blob \| blame \| history
src/fs/rock/RockIoState.cc		patch \| blob \| blame \| history
src/fs/rock/RockSwapDir.cc		patch \| blob \| blame \| history
src/fs/ufs/UFSStoreState.cc		patch \| blob \| blame \| history
src/store.cc		patch \| blob \| blame \| history
src/store/Controller.cc		patch \| blob \| blame \| history
src/store/Controller.h		patch \| blob \| blame \| history
src/store/Disk.cc		patch \| blob \| blame \| history
src/store/Disk.h		patch \| blob \| blame \| history
src/store/Disks.cc		patch \| blob \| blame \| history
src/store/Disks.h		patch \| blob \| blame \| history
src/store_swapout.cc		patch \| blob \| blame \| history
src/tests/stub_SwapDir.cc		patch \| blob \| blame \| history