From: DrDaveD <2129743+DrDaveD@users.noreply.github.com> Date: Tue, 23 Jun 2020 18:41:15 +0000 (+0000) Subject: Bug 5051: Some collapsed revalidation responses never expire (#652) X-Git-Tag: SQUID_5_0_4~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e01df3925cfadd20ab455704b4133e5a6b3c048c;p=thirdparty%2Fsquid.git Bug 5051: Some collapsed revalidation responses never expire (#652) Since negative caching support was repaired in master commit 91870bf, it has been found to last indefinitely when cache revalidation happens. New revalidation requests were collapsing on a negatively cached response forever because handleIMS() logic does not validate response freshness (still assuming that the reply came in response to the current request even though that assumption could be false since collapsed revalidation support was added in master commit 1a210de). Clearing the ENTRY_REQUIRES_COLLAPSING flag when hitting the negatively cached collapsed revalidaiton response for the first time works around this "lack of freshness check" problem. The same solution existed in the official code for positive responses. However, this solution is partial and unreliable because there is no guarantee that the clearing code will be reached (and reached while the cached response is still fresh). Also added additional partial protections against collapsing on entries abandoned by their writers, including idle hittingRequiresCollapsing() StoreEntry objects. Also fixed a tiny race condition missed in master commit d1d3b4d which addressed a much bigger (and more frequent) problem. I am not aware of any real-world cases where this race condition surfaced, but they would probably manifest in unwarranted failures to collapse. --- diff --git a/src/Transients.cc b/src/Transients.cc index 652cdc1dd4..9a0b84aefb 100644 --- a/src/Transients.cc +++ b/src/Transients.cc @@ -161,13 +161,26 @@ Transients::get(const cache_key *key) return nullptr; } + // store hadWriter before checking ENTRY_REQUIRES_COLLAPSING to avoid racing + // the writer that clears that flag and then leaves + const auto hadWriter = map->peekAtWriter(index); + if (!hadWriter && EBIT_TEST(anchor->basics.flags, ENTRY_REQUIRES_COLLAPSING)) { + debugs(20, 3, "not joining abandoned entry " << index); + map->closeForReadingAndFreeIdle(index); + return nullptr; + } + StoreEntry *e = new StoreEntry(); e->createMemObject(); e->mem_obj->xitTable.index = index; e->mem_obj->xitTable.io = Store::ioReading; anchor->exportInto(*e); - const bool collapsingRequired = EBIT_TEST(anchor->basics.flags, ENTRY_REQUIRES_COLLAPSING); - e->setCollapsingRequirement(collapsingRequired); + + if (EBIT_TEST(anchor->basics.flags, ENTRY_REQUIRES_COLLAPSING)) { + assert(hadWriter); + e->setCollapsingRequirement(true); + } + // keep read lock to receive updates from others return e; } diff --git a/src/client_side_reply.cc b/src/client_side_reply.cc index caa889b665..a75ab31f51 100644 --- a/src/client_side_reply.cc +++ b/src/client_side_reply.cc @@ -377,6 +377,10 @@ clientReplyContext::sendClientUpstreamResponse() { StoreIOBuffer tempresult; removeStoreReference(&old_sc, &old_entry); + + if (collapsedRevalidation) + http->storeEntry()->clearPublicKeyScope(); + /* here the data to send is the data we just received */ tempBuffer.offset = 0; old_reqsize = 0; @@ -453,6 +457,9 @@ clientReplyContext::handleIMSReply(StoreIOBuffer result) const auto &new_rep = http->storeEntry()->mem().freshestReply(); const auto status = new_rep.sline.status(); + // XXX: Disregard stale incomplete (i.e. still being written) borrowed (i.e. + // not caused by our request) IMS responses. That new_rep may be very old! + // origin replied 304 if (status == Http::scNotModified) { http->logType.update(LOG_TCP_REFRESH_UNMODIFIED); @@ -490,10 +497,6 @@ clientReplyContext::handleIMSReply(StoreIOBuffer result) http->logType.update(LOG_TCP_REFRESH_MODIFIED); debugs(88, 3, "origin replied " << status << ", replacing existing entry and forwarding to client"); - - if (collapsedRevalidation) - http->storeEntry()->clearPublicKeyScope(); - sendClientUpstreamResponse(); } } diff --git a/src/store/Controller.cc b/src/store/Controller.cc index ef18b8a85e..7b264a0278 100644 --- a/src/store/Controller.cc +++ b/src/store/Controller.cc @@ -278,7 +278,8 @@ Store::Controller::dereferenceIdle(StoreEntry &e, bool wantsLocalMemory) bool keepInStoreTable = false; // keep only if somebody needs it there - /* Notify the fs that we're not referencing this object any more */ + // Notify the fs that we are not referencing this object any more. This + // should be done even if we overwrite keepInStoreTable afterwards. if (e.hasDisk()) keepInStoreTable = swapDir->dereference(e) || keepInStoreTable; @@ -296,6 +297,14 @@ Store::Controller::dereferenceIdle(StoreEntry &e, bool wantsLocalMemory) keepInStoreTable = wantsLocalMemory || keepInStoreTable; } + if (e.hittingRequiresCollapsing()) { + // If we were writing this now-locally-idle entry, then we did not + // finish and should now destroy an incomplete entry. Otherwise, do not + // leave this idle StoreEntry behind because handleIMSReply() lacks + // freshness checks when hitting a collapsed revalidation entry. + keepInStoreTable = false; // may overrule fs decisions made above + } + return keepInStoreTable; } @@ -321,6 +330,28 @@ Store::Controller::hasReadableDiskEntry(const StoreEntry &e) const return swapDir->hasReadableEntry(e); } +/// flags problematic entries before find() commits to finalizing/returning them +void +Store::Controller::checkFoundCandidate(const StoreEntry &entry) const +{ + checkTransients(entry); + + // The "hittingRequiresCollapsing() has an active writer" checks below + // protect callers from getting stuck and/or from using a stale revalidation + // reply. However, these protections are not reliable because the writer may + // disappear at any time and/or without a trace. Collapsing adds risks... + if (entry.hittingRequiresCollapsing()) { + if (entry.hasTransients()) { + // Too late to check here because the writer may be gone by now, but + // Transients do check when they setCollapsingRequirement(). + } else { + // a local writer must hold a lock on its writable entry + if (!(entry.locked() && entry.isAccepting())) + throw TextException("no local writer", Here()); + } + } +} + StoreEntry * Store::Controller::find(const cache_key *key) { @@ -328,7 +359,7 @@ Store::Controller::find(const cache_key *key) try { if (!entry->key) allowSharing(*entry, key); - checkTransients(*entry); + checkFoundCandidate(*entry); entry->touch(); referenceBusy(*entry); return entry; @@ -351,6 +382,8 @@ Store::Controller::allowSharing(StoreEntry &entry, const cache_key *key) addReading(&entry, key); if (entry.hasTransients()) { + // store hadWriter before computing `found`; \see Transients::get() + const auto hadWriter = transients->hasWriter(entry); bool inSync = false; const bool found = anchorToCache(entry, inSync); if (found && !inSync) @@ -362,7 +395,7 @@ Store::Controller::allowSharing(StoreEntry &entry, const cache_key *key) throw TextException("transients entry missing ENTRY_REQUIRES_COLLAPSING", Here()); } - if (!transients->hasWriter(entry)) { + if (!hadWriter) { // prevent others from falling into the same trap throw TextException("unattached transients entry missing writer", Here()); } diff --git a/src/store/Controller.h b/src/store/Controller.h index 5a1a06d01e..58094a9bc4 100644 --- a/src/store/Controller.h +++ b/src/store/Controller.h @@ -152,6 +152,7 @@ private: bool keepForLocalMemoryCache(StoreEntry &e) const; bool anchorToCache(StoreEntry &e, bool &inSync); void checkTransients(const StoreEntry &) const; + void checkFoundCandidate(const StoreEntry &) const; Disks *swapDir; ///< summary view of all disk caches Memory *sharedMemStore; ///< memory cache that multiple workers can use