From 5bd8ce13baf09d2d4c3db88f59d21996bb5f7e09 Mon Sep 17 00:00:00 2001 From: Alex Rousskov Date: Fri, 3 Jul 2020 21:06:17 +0000 Subject: [PATCH] Concurrent hit misses due to Transients entry creation collision (#686) Transients::addEntry() fails when two simultaneous cache hit requests arrive at two different workers: Controller.cc find: e:=msV/0x55c2dbdac7f0*0 check failed: slot exception location: Transients.cc(236) addEntry The failure leads to a cache miss for one of the requests. The miss transaction will overwrite the already cached entry, possibly creating more misses due to long write locks used by the disk cache. The problem does not affect collapsed requests. It affects regular cache hits. The failure happens because two requests are trying to create a Transients entry at about the same time. Only one can succeed. With the current ReadWriteLock-based design, some contention is unavoidable, but the contention window was rather large/long: * The critical section essentially started when Controller::find() (in each request worker) checked Transients and did not find a matching entry there. That failed lookup put each request into "create a new Transients entry" mode. * A worker exited that critical section when openForWriting() in Transients::addEntry() returned (with a write lock to the ready-to-be-filled entry for one of the two requests and with a failure for the other). This change reduces the contention window to the bare minimum necessary to create and fill a Transients entry inside openOrCreateForReading(). In the micro-tests that exposed this problem, the probability of a failure is reduced from more than 70% to less than 3%. YMMV. Also split addEntry() into two directional methods because while they are structured similarly they actually have almost no common code now. --- src/Transients.cc | 79 +++++++++++++++++++++++++++++++-------------- src/Transients.h | 3 ++ src/ipc/StoreMap.cc | 37 +++++++++++++++++++++ src/ipc/StoreMap.h | 3 ++ 4 files changed, 98 insertions(+), 24 deletions(-) diff --git a/src/Transients.cc b/src/Transients.cc index 235cfb7511..1ba427b37f 100644 --- a/src/Transients.cc +++ b/src/Transients.cc @@ -173,14 +173,7 @@ Transients::get(const cache_key *key) StoreEntry *e = new StoreEntry(); e->createMemObject(); - e->mem_obj->xitTable.index = index; - e->mem_obj->xitTable.io = Store::ioReading; - anchor->exportInto(*e); - - if (EBIT_TEST(anchor->basics.flags, ENTRY_REQUIRES_COLLAPSING)) { - assert(hadWriter); - e->setCollapsingRequirement(true); - } + anchorEntry(*e, index, *anchor); // keep read lock to receive updates from others return e; @@ -244,25 +237,63 @@ Transients::addEntry(StoreEntry *e, const cache_key *key, const Store::IoStatus Must(map); // configured to track transients + if (direction == Store::ioWriting) + return addWriterEntry(*e, key); + + assert(direction == Store::ioReading); + addReaderEntry(*e, key); +} + +/// addEntry() helper used for cache entry creators/writers +void +Transients::addWriterEntry(StoreEntry &e, const cache_key *key) +{ sfileno index = 0; - Ipc::StoreMapAnchor *slot = map->openForWriting(key, index); - Must(slot); // no writer collisions + const auto anchor = map->openForWriting(key, index); + if (!anchor) + throw TextException("writer collision", Here()); // set ASAP in hope to unlock the slot if something throws - e->mem_obj->xitTable.index = index; - e->mem_obj->xitTable.io = Store::ioWriting; - - slot->set(*e, key); - if (direction == Store::ioWriting) { - // allow reading and receive remote DELETE events, but do not switch to - // the reading lock because transientReaders() callers want true readers - map->startAppending(index); - } else { - assert(direction == Store::ioReading); - // keep the entry locked (for reading) to receive remote DELETE events - map->switchWritingToReading(index); - e->mem_obj->xitTable.io = Store::ioReading; - } + // and to provide index to such methods as hasWriter() + auto &xitTable = e.mem_obj->xitTable; + xitTable.index = index; + xitTable.io = Store::ioWriting; + + anchor->set(e, key); + // allow reading and receive remote DELETE events, but do not switch to + // the reading lock because transientReaders() callers want true readers + map->startAppending(index); +} + +/// addEntry() helper used for cache readers +/// readers do not modify the cache, but they must create a Transients entry +void +Transients::addReaderEntry(StoreEntry &e, const cache_key *key) +{ + sfileno index = 0; + const auto anchor = map->openOrCreateForReading(key, index, e); + if (!anchor) + throw TextException("reader collision", Here()); + + anchorEntry(e, index, *anchor); + // keep the entry locked (for reading) to receive remote DELETE events +} + +/// fills (recently created) StoreEntry with information currently in Transients +void +Transients::anchorEntry(StoreEntry &e, const sfileno index, const Ipc::StoreMapAnchor &anchor) +{ + // set ASAP in hope to unlock the slot if something throws + // and to provide index to such methods as hasWriter() + auto &xitTable = e.mem_obj->xitTable; + xitTable.index = index; + xitTable.io = Store::ioReading; + + const auto hadWriter = hasWriter(e); // before computing collapsingRequired + anchor.exportInto(e); + const bool collapsingRequired = EBIT_TEST(anchor.basics.flags, ENTRY_REQUIRES_COLLAPSING); + assert(!collapsingRequired || hadWriter); + e.setCollapsingRequirement(collapsingRequired); } bool diff --git a/src/Transients.h b/src/Transients.h index 8d2c0280ea..0b2692101f 100644 --- a/src/Transients.h +++ b/src/Transients.h @@ -95,6 +95,9 @@ public: protected: void addEntry(StoreEntry*, const cache_key *, const Store::IoStatus); + void addWriterEntry(StoreEntry &, const cache_key *); + void addReaderEntry(StoreEntry &, const cache_key *); + void anchorEntry(StoreEntry &, const sfileno, const Ipc::StoreMapAnchor &); // Ipc::StoreMapCleaner API virtual void noteFreeMapSlice(const Ipc::StoreMapSliceId sliceId) override; diff --git a/src/ipc/StoreMap.cc b/src/ipc/StoreMap.cc index 07a07a5c80..b0f5e21695 100644 --- a/src/ipc/StoreMap.cc +++ b/src/ipc/StoreMap.cc @@ -99,6 +99,43 @@ Ipc::StoreMap::forgetWritingEntry(sfileno fileno) debugs(54, 8, "closed entry " << fileno << " for writing " << path); } +const Ipc::StoreMap::Anchor * +Ipc::StoreMap::openOrCreateForReading(const cache_key *const key, sfileno &fileno, const StoreEntry &entry) +{ + debugs(54, 5, "opening/creating entry with key " << storeKeyText(key) + << " for reading " << path); + + // start with reading so that we do not overwrite an existing unlocked entry + auto idx = fileNoByKey(key); + if (const auto anchor = openForReadingAt(idx, key)) { + fileno = idx; + return anchor; + } + + // the competing openOrCreateForReading() workers race to create a new entry + idx = fileNoByKey(key); + if (auto anchor = openForWritingAt(idx)) { + anchor->set(entry, key); + anchor->lock.switchExclusiveToShared(); + // race ended + assert(anchor->complete()); + fileno = idx; + debugs(54, 5, "switched entry " << fileno << " from writing to reading " << path); + return anchor; + } + + // we lost the above race; see if the winner-created entry is now readable + // TODO: Do some useful housekeeping work here to give the winner more time. + idx = fileNoByKey(key); + if (const auto anchor = openForReadingAt(idx, key)) { + fileno = idx; + return anchor; + } + + // slow entry creator or some other problem + return nullptr; +} + Ipc::StoreMap::Anchor * Ipc::StoreMap::openForWriting(const cache_key *const key, sfileno &fileno) { diff --git a/src/ipc/StoreMap.h b/src/ipc/StoreMap.h index b499fff0b0..0edc82ec17 100644 --- a/src/ipc/StoreMap.h +++ b/src/ipc/StoreMap.h @@ -294,6 +294,9 @@ public: /// same as closeForReading() but also frees the entry if it is unlocked void closeForReadingAndFreeIdle(const sfileno fileno); + /// openForReading() but creates a new entry if there is no old one + const Anchor *openOrCreateForReading(const cache_key *, sfileno &, const StoreEntry &); + /// writeable slice within an entry chain created by openForWriting() Slice &writeableSlice(const AnchorId anchorId, const SliceId sliceId); /// readable slice within an entry chain opened by openForReading() -- 2.47.2